1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/cardTable.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.hpp" 37 #include "interpreter/bytecodeHistogram.hpp" 38 #include "interpreter/interpreter.hpp" 39 #include "interpreter/interpreterRuntime.hpp" 40 #include "memory/resourceArea.hpp" 41 #include "memory/universe.hpp" 42 #include "oops/accessDecorators.hpp" 43 #include "oops/compressedKlass.inline.hpp" 44 #include "oops/compressedOops.inline.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/oop.hpp" 47 #include "runtime/interfaceSupport.inline.hpp" 48 #include "runtime/javaThread.hpp" 49 #include "runtime/jniHandles.inline.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/stubRoutines.hpp" 52 #include "utilities/globalDefinitions.hpp" 53 #include "utilities/powerOfTwo.hpp" 54 #ifdef COMPILER2 55 #include "opto/compile.hpp" 56 #include "opto/node.hpp" 57 #include "opto/output.hpp" 58 #endif 59 60 #ifdef PRODUCT 61 #define BLOCK_COMMENT(str) /* nothing */ 62 #else 63 #define BLOCK_COMMENT(str) block_comment(str) 64 #endif 65 #define STOP(str) stop(str); 66 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 67 68 69 70 Register MacroAssembler::extract_rs1(address instr) { 71 assert_cond(instr != nullptr); 72 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 19, 15)); 73 } 74 75 Register MacroAssembler::extract_rs2(address instr) { 76 assert_cond(instr != nullptr); 77 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 24, 20)); 78 } 79 80 Register MacroAssembler::extract_rd(address instr) { 81 assert_cond(instr != nullptr); 82 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 11, 7)); 83 } 84 85 uint32_t MacroAssembler::extract_opcode(address instr) { 86 assert_cond(instr != nullptr); 87 return Assembler::extract(Assembler::ld_instr(instr), 6, 0); 88 } 89 90 uint32_t MacroAssembler::extract_funct3(address instr) { 91 assert_cond(instr != nullptr); 92 return Assembler::extract(Assembler::ld_instr(instr), 14, 12); 93 } 94 95 bool MacroAssembler::is_pc_relative_at(address instr) { 96 // auipc + jalr 97 // auipc + addi 98 // auipc + load 99 // auipc + fload_load 100 return (is_auipc_at(instr)) && 101 (is_addi_at(instr + instruction_size) || 102 is_jalr_at(instr + instruction_size) || 103 is_load_at(instr + instruction_size) || 104 is_float_load_at(instr + instruction_size)) && 105 check_pc_relative_data_dependency(instr); 106 } 107 108 // ie:ld(Rd, Label) 109 bool MacroAssembler::is_load_pc_relative_at(address instr) { 110 return is_auipc_at(instr) && // auipc 111 is_ld_at(instr + instruction_size) && // ld 112 check_load_pc_relative_data_dependency(instr); 113 } 114 115 bool MacroAssembler::is_movptr1_at(address instr) { 116 return is_lui_at(instr) && // Lui 117 is_addi_at(instr + instruction_size) && // Addi 118 is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 119 is_addi_at(instr + instruction_size * 3) && // Addi 120 is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 121 (is_addi_at(instr + instruction_size * 5) || 122 is_jalr_at(instr + instruction_size * 5) || 123 is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load 124 check_movptr1_data_dependency(instr); 125 } 126 127 bool MacroAssembler::is_movptr2_at(address instr) { 128 return is_lui_at(instr) && // lui 129 is_lui_at(instr + instruction_size) && // lui 130 is_slli_shift_at(instr + instruction_size * 2, 18) && // slli Rd, Rs, 18 131 is_add_at(instr + instruction_size * 3) && 132 (is_addi_at(instr + instruction_size * 4) || 133 is_jalr_at(instr + instruction_size * 4) || 134 is_load_at(instr + instruction_size * 4)) && // Addi/Jalr/Load 135 check_movptr2_data_dependency(instr); 136 } 137 138 bool MacroAssembler::is_li16u_at(address instr) { 139 return is_lui_at(instr) && // lui 140 is_srli_at(instr + instruction_size) && // srli 141 check_li16u_data_dependency(instr); 142 } 143 144 bool MacroAssembler::is_li32_at(address instr) { 145 return is_lui_at(instr) && // lui 146 is_addiw_at(instr + instruction_size) && // addiw 147 check_li32_data_dependency(instr); 148 } 149 150 bool MacroAssembler::is_lwu_to_zr(address instr) { 151 assert_cond(instr != nullptr); 152 return (extract_opcode(instr) == 0b0000011 && 153 extract_funct3(instr) == 0b110 && 154 extract_rd(instr) == zr); // zr 155 } 156 157 uint32_t MacroAssembler::get_membar_kind(address addr) { 158 assert_cond(addr != nullptr); 159 assert(is_membar(addr), "no membar found"); 160 161 uint32_t insn = Bytes::get_native_u4(addr); 162 163 uint32_t predecessor = Assembler::extract(insn, 27, 24); 164 uint32_t successor = Assembler::extract(insn, 23, 20); 165 166 return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); 167 } 168 169 void MacroAssembler::set_membar_kind(address addr, uint32_t order_kind) { 170 assert_cond(addr != nullptr); 171 assert(is_membar(addr), "no membar found"); 172 173 uint32_t predecessor = 0; 174 uint32_t successor = 0; 175 176 MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); 177 178 uint32_t insn = Bytes::get_native_u4(addr); 179 address pInsn = (address) &insn; 180 Assembler::patch(pInsn, 27, 24, predecessor); 181 Assembler::patch(pInsn, 23, 20, successor); 182 183 address membar = addr; 184 Assembler::sd_instr(membar, insn); 185 } 186 187 188 static void pass_arg0(MacroAssembler* masm, Register arg) { 189 if (c_rarg0 != arg) { 190 masm->mv(c_rarg0, arg); 191 } 192 } 193 194 static void pass_arg1(MacroAssembler* masm, Register arg) { 195 if (c_rarg1 != arg) { 196 masm->mv(c_rarg1, arg); 197 } 198 } 199 200 static void pass_arg2(MacroAssembler* masm, Register arg) { 201 if (c_rarg2 != arg) { 202 masm->mv(c_rarg2, arg); 203 } 204 } 205 206 static void pass_arg3(MacroAssembler* masm, Register arg) { 207 if (c_rarg3 != arg) { 208 masm->mv(c_rarg3, arg); 209 } 210 } 211 212 void MacroAssembler::push_cont_fastpath(Register java_thread) { 213 if (!Continuations::enabled()) return; 214 Label done; 215 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 216 bleu(sp, t0, done); 217 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 218 bind(done); 219 } 220 221 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 222 if (!Continuations::enabled()) return; 223 Label done; 224 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 225 bltu(sp, t0, done); 226 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 227 bind(done); 228 } 229 230 void MacroAssembler::inc_held_monitor_count(Register tmp) { 231 Address dst = Address(xthread, JavaThread::held_monitor_count_offset()); 232 ld(tmp, dst); 233 addi(tmp, tmp, 1); 234 sd(tmp, dst); 235 #ifdef ASSERT 236 Label ok; 237 test_bit(tmp, tmp, 63); 238 beqz(tmp, ok); 239 STOP("assert(held monitor count overflow)"); 240 should_not_reach_here(); 241 bind(ok); 242 #endif 243 } 244 245 void MacroAssembler::dec_held_monitor_count(Register tmp) { 246 Address dst = Address(xthread, JavaThread::held_monitor_count_offset()); 247 ld(tmp, dst); 248 addi(tmp, tmp, -1); 249 sd(tmp, dst); 250 #ifdef ASSERT 251 Label ok; 252 test_bit(tmp, tmp, 63); 253 beqz(tmp, ok); 254 STOP("assert(held monitor count underflow)"); 255 should_not_reach_here(); 256 bind(ok); 257 #endif 258 } 259 260 int MacroAssembler::align(int modulus, int extra_offset) { 261 CompressibleRegion cr(this); 262 intptr_t before = offset(); 263 while ((offset() + extra_offset) % modulus != 0) { nop(); } 264 return (int)(offset() - before); 265 } 266 267 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 268 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 269 } 270 271 // Implementation of call_VM versions 272 273 void MacroAssembler::call_VM(Register oop_result, 274 address entry_point, 275 bool check_exceptions) { 276 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 277 } 278 279 void MacroAssembler::call_VM(Register oop_result, 280 address entry_point, 281 Register arg_1, 282 bool check_exceptions) { 283 pass_arg1(this, arg_1); 284 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 285 } 286 287 void MacroAssembler::call_VM(Register oop_result, 288 address entry_point, 289 Register arg_1, 290 Register arg_2, 291 bool check_exceptions) { 292 assert_different_registers(arg_1, c_rarg2); 293 pass_arg2(this, arg_2); 294 pass_arg1(this, arg_1); 295 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 296 } 297 298 void MacroAssembler::call_VM(Register oop_result, 299 address entry_point, 300 Register arg_1, 301 Register arg_2, 302 Register arg_3, 303 bool check_exceptions) { 304 assert_different_registers(arg_1, c_rarg2, c_rarg3); 305 assert_different_registers(arg_2, c_rarg3); 306 pass_arg3(this, arg_3); 307 308 pass_arg2(this, arg_2); 309 310 pass_arg1(this, arg_1); 311 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 312 } 313 314 void MacroAssembler::call_VM(Register oop_result, 315 Register last_java_sp, 316 address entry_point, 317 int number_of_arguments, 318 bool check_exceptions) { 319 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 320 } 321 322 void MacroAssembler::call_VM(Register oop_result, 323 Register last_java_sp, 324 address entry_point, 325 Register arg_1, 326 bool check_exceptions) { 327 pass_arg1(this, arg_1); 328 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 329 } 330 331 void MacroAssembler::call_VM(Register oop_result, 332 Register last_java_sp, 333 address entry_point, 334 Register arg_1, 335 Register arg_2, 336 bool check_exceptions) { 337 338 assert_different_registers(arg_1, c_rarg2); 339 pass_arg2(this, arg_2); 340 pass_arg1(this, arg_1); 341 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 342 } 343 344 void MacroAssembler::call_VM(Register oop_result, 345 Register last_java_sp, 346 address entry_point, 347 Register arg_1, 348 Register arg_2, 349 Register arg_3, 350 bool check_exceptions) { 351 assert_different_registers(arg_1, c_rarg2, c_rarg3); 352 assert_different_registers(arg_2, c_rarg3); 353 pass_arg3(this, arg_3); 354 pass_arg2(this, arg_2); 355 pass_arg1(this, arg_1); 356 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 357 } 358 359 void MacroAssembler::post_call_nop() { 360 if (!Continuations::enabled()) { 361 return; 362 } 363 relocate(post_call_nop_Relocation::spec(), [&] { 364 InlineSkippedInstructionsCounter skipCounter(this); 365 nop(); 366 li32(zr, 0); 367 }); 368 } 369 370 // these are no-ops overridden by InterpreterMacroAssembler 371 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 372 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 373 374 // Calls to C land 375 // 376 // When entering C land, the fp, & esp of the last Java frame have to be recorded 377 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 378 // has to be reset to 0. This is required to allow proper stack traversal. 379 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 380 Register last_java_fp, 381 Register last_java_pc) { 382 383 if (last_java_pc->is_valid()) { 384 sd(last_java_pc, Address(xthread, 385 JavaThread::frame_anchor_offset() + 386 JavaFrameAnchor::last_Java_pc_offset())); 387 } 388 389 // determine last_java_sp register 390 if (!last_java_sp->is_valid()) { 391 last_java_sp = esp; 392 } 393 394 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 395 396 // last_java_fp is optional 397 if (last_java_fp->is_valid()) { 398 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 399 } 400 } 401 402 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 403 Register last_java_fp, 404 address last_java_pc, 405 Register tmp) { 406 assert(last_java_pc != nullptr, "must provide a valid PC"); 407 408 la(tmp, last_java_pc); 409 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 410 411 set_last_Java_frame(last_java_sp, last_java_fp, noreg); 412 } 413 414 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 415 Register last_java_fp, 416 Label &L, 417 Register tmp) { 418 if (L.is_bound()) { 419 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 420 } else { 421 L.add_patch_at(code(), locator()); 422 IncompressibleRegion ir(this); // the label address will be patched back. 423 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 424 } 425 } 426 427 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 428 // we must set sp to zero to clear frame 429 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 430 431 // must clear fp, so that compiled frames are not confused; it is 432 // possible that we need it only for debugging 433 if (clear_fp) { 434 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 435 } 436 437 // Always clear the pc because it could have been set by make_walkable() 438 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 439 } 440 441 void MacroAssembler::call_VM_base(Register oop_result, 442 Register java_thread, 443 Register last_java_sp, 444 address entry_point, 445 int number_of_arguments, 446 bool check_exceptions) { 447 // determine java_thread register 448 if (!java_thread->is_valid()) { 449 java_thread = xthread; 450 } 451 // determine last_java_sp register 452 if (!last_java_sp->is_valid()) { 453 last_java_sp = esp; 454 } 455 456 // debugging support 457 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 458 assert(java_thread == xthread, "unexpected register"); 459 460 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 461 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 462 463 // push java thread (becomes first argument of C function) 464 mv(c_rarg0, java_thread); 465 466 // set last Java frame before call 467 assert(last_java_sp != fp, "can't use fp"); 468 469 Label l; 470 set_last_Java_frame(last_java_sp, fp, l, t0); 471 472 // do the call, remove parameters 473 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 474 475 // reset last Java frame 476 // Only interpreter should have to clear fp 477 reset_last_Java_frame(true); 478 479 // C++ interp handles this in the interpreter 480 check_and_handle_popframe(java_thread); 481 check_and_handle_earlyret(java_thread); 482 483 if (check_exceptions) { 484 // check for pending exceptions (java_thread is set upon return) 485 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 486 Label ok; 487 beqz(t0, ok); 488 RuntimeAddress target(StubRoutines::forward_exception_entry()); 489 relocate(target.rspec(), [&] { 490 int32_t offset; 491 la(t0, target.target(), offset); 492 jr(t0, offset); 493 }); 494 bind(ok); 495 } 496 497 // get oop result if there is one and reset the value in the thread 498 if (oop_result->is_valid()) { 499 get_vm_result(oop_result, java_thread); 500 } 501 } 502 503 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 504 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 505 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 506 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 507 } 508 509 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 510 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 511 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 512 } 513 514 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 515 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 516 assert_different_registers(klass, xthread, tmp); 517 518 Label L_fallthrough, L_tmp; 519 if (L_fast_path == nullptr) { 520 L_fast_path = &L_fallthrough; 521 } else if (L_slow_path == nullptr) { 522 L_slow_path = &L_fallthrough; 523 } 524 525 // Fast path check: class is fully initialized 526 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 527 sub(tmp, tmp, InstanceKlass::fully_initialized); 528 beqz(tmp, *L_fast_path); 529 530 // Fast path check: current thread is initializer thread 531 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 532 533 if (L_slow_path == &L_fallthrough) { 534 beq(xthread, tmp, *L_fast_path); 535 bind(*L_slow_path); 536 } else if (L_fast_path == &L_fallthrough) { 537 bne(xthread, tmp, *L_slow_path); 538 bind(*L_fast_path); 539 } else { 540 Unimplemented(); 541 } 542 } 543 544 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 545 if (!VerifyOops) { return; } 546 547 // Pass register number to verify_oop_subroutine 548 const char* b = nullptr; 549 { 550 ResourceMark rm; 551 stringStream ss; 552 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 553 b = code_string(ss.as_string()); 554 } 555 BLOCK_COMMENT("verify_oop {"); 556 557 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 558 559 mv(c_rarg0, reg); // c_rarg0 : x10 560 { 561 // The length of the instruction sequence emitted should not depend 562 // on the address of the char buffer so that the size of mach nodes for 563 // scratch emit and normal emit matches. 564 IncompressibleRegion ir(this); // Fixed length 565 movptr(t0, (address) b); 566 } 567 568 // call indirectly to solve generation ordering problem 569 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 570 relocate(target.rspec(), [&] { 571 int32_t offset; 572 la(t1, target.target(), offset); 573 ld(t1, Address(t1, offset)); 574 }); 575 jalr(t1); 576 577 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 578 579 BLOCK_COMMENT("} verify_oop"); 580 } 581 582 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 583 if (!VerifyOops) { 584 return; 585 } 586 587 const char* b = nullptr; 588 { 589 ResourceMark rm; 590 stringStream ss; 591 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 592 b = code_string(ss.as_string()); 593 } 594 BLOCK_COMMENT("verify_oop_addr {"); 595 596 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 597 598 if (addr.uses(sp)) { 599 la(x10, addr); 600 ld(x10, Address(x10, 4 * wordSize)); 601 } else { 602 ld(x10, addr); 603 } 604 605 { 606 // The length of the instruction sequence emitted should not depend 607 // on the address of the char buffer so that the size of mach nodes for 608 // scratch emit and normal emit matches. 609 IncompressibleRegion ir(this); // Fixed length 610 movptr(t0, (address) b); 611 } 612 613 // call indirectly to solve generation ordering problem 614 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 615 relocate(target.rspec(), [&] { 616 int32_t offset; 617 la(t1, target.target(), offset); 618 ld(t1, Address(t1, offset)); 619 }); 620 jalr(t1); 621 622 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 623 624 BLOCK_COMMENT("} verify_oop_addr"); 625 } 626 627 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 628 int extra_slot_offset) { 629 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 630 int stackElementSize = Interpreter::stackElementSize; 631 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 632 #ifdef ASSERT 633 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 634 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 635 #endif 636 if (arg_slot.is_constant()) { 637 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 638 } else { 639 assert_different_registers(t0, arg_slot.as_register()); 640 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 641 return Address(t0, offset); 642 } 643 } 644 645 #ifndef PRODUCT 646 extern "C" void findpc(intptr_t x); 647 #endif 648 649 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 650 { 651 // In order to get locks to work, we need to fake a in_VM state 652 if (ShowMessageBoxOnError) { 653 JavaThread* thread = JavaThread::current(); 654 JavaThreadState saved_state = thread->thread_state(); 655 thread->set_thread_state(_thread_in_vm); 656 #ifndef PRODUCT 657 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 658 ttyLocker ttyl; 659 BytecodeCounter::print(); 660 } 661 #endif 662 if (os::message_box(msg, "Execution stopped, print registers?")) { 663 ttyLocker ttyl; 664 tty->print_cr(" pc = 0x%016lx", pc); 665 #ifndef PRODUCT 666 tty->cr(); 667 findpc(pc); 668 tty->cr(); 669 #endif 670 tty->print_cr(" x0 = 0x%016lx", regs[0]); 671 tty->print_cr(" x1 = 0x%016lx", regs[1]); 672 tty->print_cr(" x2 = 0x%016lx", regs[2]); 673 tty->print_cr(" x3 = 0x%016lx", regs[3]); 674 tty->print_cr(" x4 = 0x%016lx", regs[4]); 675 tty->print_cr(" x5 = 0x%016lx", regs[5]); 676 tty->print_cr(" x6 = 0x%016lx", regs[6]); 677 tty->print_cr(" x7 = 0x%016lx", regs[7]); 678 tty->print_cr(" x8 = 0x%016lx", regs[8]); 679 tty->print_cr(" x9 = 0x%016lx", regs[9]); 680 tty->print_cr("x10 = 0x%016lx", regs[10]); 681 tty->print_cr("x11 = 0x%016lx", regs[11]); 682 tty->print_cr("x12 = 0x%016lx", regs[12]); 683 tty->print_cr("x13 = 0x%016lx", regs[13]); 684 tty->print_cr("x14 = 0x%016lx", regs[14]); 685 tty->print_cr("x15 = 0x%016lx", regs[15]); 686 tty->print_cr("x16 = 0x%016lx", regs[16]); 687 tty->print_cr("x17 = 0x%016lx", regs[17]); 688 tty->print_cr("x18 = 0x%016lx", regs[18]); 689 tty->print_cr("x19 = 0x%016lx", regs[19]); 690 tty->print_cr("x20 = 0x%016lx", regs[20]); 691 tty->print_cr("x21 = 0x%016lx", regs[21]); 692 tty->print_cr("x22 = 0x%016lx", regs[22]); 693 tty->print_cr("x23 = 0x%016lx", regs[23]); 694 tty->print_cr("x24 = 0x%016lx", regs[24]); 695 tty->print_cr("x25 = 0x%016lx", regs[25]); 696 tty->print_cr("x26 = 0x%016lx", regs[26]); 697 tty->print_cr("x27 = 0x%016lx", regs[27]); 698 tty->print_cr("x28 = 0x%016lx", regs[28]); 699 tty->print_cr("x30 = 0x%016lx", regs[30]); 700 tty->print_cr("x31 = 0x%016lx", regs[31]); 701 BREAKPOINT; 702 } 703 } 704 fatal("DEBUG MESSAGE: %s", msg); 705 } 706 707 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 708 assert_different_registers(value, tmp1, tmp2); 709 Label done, tagged, weak_tagged; 710 711 beqz(value, done); // Use null as-is. 712 // Test for tag. 713 andi(tmp1, value, JNIHandles::tag_mask); 714 bnez(tmp1, tagged); 715 716 // Resolve local handle 717 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 718 verify_oop(value); 719 j(done); 720 721 bind(tagged); 722 // Test for jweak tag. 723 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 724 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 725 bnez(tmp1, weak_tagged); 726 727 // Resolve global handle 728 access_load_at(T_OBJECT, IN_NATIVE, value, 729 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 730 verify_oop(value); 731 j(done); 732 733 bind(weak_tagged); 734 // Resolve jweak. 735 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 736 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 737 verify_oop(value); 738 739 bind(done); 740 } 741 742 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 743 assert_different_registers(value, tmp1, tmp2); 744 Label done; 745 746 beqz(value, done); // Use null as-is. 747 748 #ifdef ASSERT 749 { 750 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 751 Label valid_global_tag; 752 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 753 bnez(tmp1, valid_global_tag); 754 stop("non global jobject using resolve_global_jobject"); 755 bind(valid_global_tag); 756 } 757 #endif 758 759 // Resolve global handle 760 access_load_at(T_OBJECT, IN_NATIVE, value, 761 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 762 verify_oop(value); 763 764 bind(done); 765 } 766 767 void MacroAssembler::stop(const char* msg) { 768 BLOCK_COMMENT(msg); 769 illegal_instruction(Assembler::csr::time); 770 emit_int64((uintptr_t)msg); 771 } 772 773 void MacroAssembler::unimplemented(const char* what) { 774 const char* buf = nullptr; 775 { 776 ResourceMark rm; 777 stringStream ss; 778 ss.print("unimplemented: %s", what); 779 buf = code_string(ss.as_string()); 780 } 781 stop(buf); 782 } 783 784 void MacroAssembler::emit_static_call_stub() { 785 IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). 786 // CompiledDirectCall::set_to_interpreted knows the 787 // exact layout of this stub. 788 789 mov_metadata(xmethod, (Metadata*)nullptr); 790 791 // Jump to the entry point of the c2i stub. 792 int32_t offset = 0; 793 movptr(t0, 0, offset, t1); // lui + lui + slli + add 794 jr(t0, offset); 795 } 796 797 void MacroAssembler::call_VM_leaf_base(address entry_point, 798 int number_of_arguments, 799 Label *retaddr) { 800 int32_t offset = 0; 801 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 802 803 mv(t0, entry_point, offset); 804 jalr(t0, offset); 805 if (retaddr != nullptr) { 806 bind(*retaddr); 807 } 808 809 Label not_preempted; 810 if (entry_point == CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter)) { 811 ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset())); 812 beqz(t0, not_preempted); 813 sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset())); 814 jr(t0); 815 } 816 bind(not_preempted); 817 818 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 819 } 820 821 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 822 call_VM_leaf_base(entry_point, number_of_arguments); 823 } 824 825 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 826 pass_arg0(this, arg_0); 827 call_VM_leaf_base(entry_point, 1); 828 } 829 830 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 831 assert_different_registers(arg_1, c_rarg0); 832 pass_arg0(this, arg_0); 833 pass_arg1(this, arg_1); 834 call_VM_leaf_base(entry_point, 2); 835 } 836 837 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 838 Register arg_1, Register arg_2) { 839 assert_different_registers(arg_1, c_rarg0); 840 assert_different_registers(arg_2, c_rarg0, c_rarg1); 841 pass_arg0(this, arg_0); 842 pass_arg1(this, arg_1); 843 pass_arg2(this, arg_2); 844 call_VM_leaf_base(entry_point, 3); 845 } 846 847 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 848 pass_arg0(this, arg_0); 849 MacroAssembler::call_VM_leaf_base(entry_point, 1); 850 } 851 852 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 853 854 assert_different_registers(arg_0, c_rarg1); 855 pass_arg1(this, arg_1); 856 pass_arg0(this, arg_0); 857 MacroAssembler::call_VM_leaf_base(entry_point, 2); 858 } 859 860 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 861 assert_different_registers(arg_0, c_rarg1, c_rarg2); 862 assert_different_registers(arg_1, c_rarg2); 863 pass_arg2(this, arg_2); 864 pass_arg1(this, arg_1); 865 pass_arg0(this, arg_0); 866 MacroAssembler::call_VM_leaf_base(entry_point, 3); 867 } 868 869 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 870 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 871 assert_different_registers(arg_1, c_rarg2, c_rarg3); 872 assert_different_registers(arg_2, c_rarg3); 873 874 pass_arg3(this, arg_3); 875 pass_arg2(this, arg_2); 876 pass_arg1(this, arg_1); 877 pass_arg0(this, arg_0); 878 MacroAssembler::call_VM_leaf_base(entry_point, 4); 879 } 880 881 void MacroAssembler::la(Register Rd, const address addr) { 882 int32_t offset; 883 la(Rd, addr, offset); 884 addi(Rd, Rd, offset); 885 } 886 887 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) { 888 if (is_32bit_offset_from_codecache((int64_t)addr)) { 889 int64_t distance = addr - pc(); 890 assert(is_valid_32bit_offset(distance), "Must be"); 891 auipc(Rd, (int32_t)distance + 0x800); 892 offset = ((int32_t)distance << 20) >> 20; 893 } else { 894 assert(!CodeCache::contains(addr), "Must be"); 895 movptr(Rd, addr, offset); 896 } 897 } 898 899 void MacroAssembler::la(Register Rd, const Address &adr) { 900 switch (adr.getMode()) { 901 case Address::literal: { 902 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 903 if (rtype == relocInfo::none) { 904 mv(Rd, (intptr_t)(adr.target())); 905 } else { 906 relocate(adr.rspec(), [&] { 907 movptr(Rd, adr.target()); 908 }); 909 } 910 break; 911 } 912 case Address::base_plus_offset: { 913 Address new_adr = legitimize_address(Rd, adr); 914 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 915 addi(Rd, new_adr.base(), new_adr.offset()); 916 } 917 break; 918 } 919 default: 920 ShouldNotReachHere(); 921 } 922 } 923 924 void MacroAssembler::la(Register Rd, Label &label) { 925 IncompressibleRegion ir(this); // the label address may be patched back. 926 wrap_label(Rd, label, &MacroAssembler::la); 927 } 928 929 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 930 lui(Rd, (uint32_t)imm << 12); 931 srli(Rd, Rd, 12); 932 } 933 934 void MacroAssembler::li32(Register Rd, int32_t imm) { 935 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 936 int64_t upper = imm, lower = imm; 937 lower = (imm << 20) >> 20; 938 upper -= lower; 939 upper = (int32_t)upper; 940 // lui Rd, imm[31:12] + imm[11] 941 lui(Rd, upper); 942 addiw(Rd, Rd, lower); 943 } 944 945 void MacroAssembler::li(Register Rd, int64_t imm) { 946 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 947 // li -> c.li 948 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 949 c_li(Rd, imm); 950 return; 951 } 952 953 int shift = 12; 954 int64_t upper = imm, lower = imm; 955 // Split imm to a lower 12-bit sign-extended part and the remainder, 956 // because addi will sign-extend the lower imm. 957 lower = ((int32_t)imm << 20) >> 20; 958 upper -= lower; 959 960 // Test whether imm is a 32-bit integer. 961 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 962 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 963 while (((upper >> shift) & 1) == 0) { shift++; } 964 upper >>= shift; 965 li(Rd, upper); 966 slli(Rd, Rd, shift); 967 if (lower != 0) { 968 addi(Rd, Rd, lower); 969 } 970 } else { 971 // 32-bit integer 972 Register hi_Rd = zr; 973 if (upper != 0) { 974 lui(Rd, (int32_t)upper); 975 hi_Rd = Rd; 976 } 977 if (lower != 0 || hi_Rd == zr) { 978 addiw(Rd, hi_Rd, lower); 979 } 980 } 981 } 982 983 void MacroAssembler::load_link_jump(const address source, Register temp) { 984 assert(temp != noreg && temp != x0, "expecting a register"); 985 assert_cond(source != nullptr); 986 int64_t distance = source - pc(); 987 assert(is_simm32(distance), "Must be"); 988 auipc(temp, (int32_t)distance + 0x800); 989 ld(temp, Address(temp, ((int32_t)distance << 20) >> 20)); 990 jalr(temp); 991 } 992 993 void MacroAssembler::jump_link(const address dest, Register temp) { 994 assert(UseTrampolines, "Must be"); 995 assert_cond(dest != nullptr); 996 int64_t distance = dest - pc(); 997 assert(is_simm21(distance), "Must be"); 998 assert((distance % 2) == 0, "Must be"); 999 jal(x1, distance); 1000 } 1001 1002 void MacroAssembler::j(const address dest, Register temp) { 1003 assert(CodeCache::contains(dest), "Must be"); 1004 assert_cond(dest != nullptr); 1005 int64_t distance = dest - pc(); 1006 1007 // We can't patch C, i.e. if Label wasn't bound we need to patch this jump. 1008 IncompressibleRegion ir(this); 1009 if (is_simm21(distance) && ((distance % 2) == 0)) { 1010 Assembler::jal(x0, distance); 1011 } else { 1012 assert(temp != noreg && temp != x0, "expecting a register"); 1013 int32_t offset = 0; 1014 la(temp, dest, offset); 1015 jr(temp, offset); 1016 } 1017 } 1018 1019 void MacroAssembler::j(const Address &adr, Register temp) { 1020 switch (adr.getMode()) { 1021 case Address::literal: { 1022 relocate(adr.rspec(), [&] { 1023 j(adr.target(), temp); 1024 }); 1025 break; 1026 } 1027 case Address::base_plus_offset: { 1028 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; 1029 la(temp, Address(adr.base(), adr.offset() - offset)); 1030 jr(temp, offset); 1031 break; 1032 } 1033 default: 1034 ShouldNotReachHere(); 1035 } 1036 } 1037 1038 void MacroAssembler::j(Label &lab, Register temp) { 1039 assert_different_registers(x0, temp); 1040 if (lab.is_bound()) { 1041 MacroAssembler::j(target(lab), temp); 1042 } else { 1043 lab.add_patch_at(code(), locator()); 1044 MacroAssembler::j(pc(), temp); 1045 } 1046 } 1047 1048 void MacroAssembler::jr(Register Rd, int32_t offset) { 1049 assert(Rd != noreg, "expecting a register"); 1050 Assembler::jalr(x0, Rd, offset); 1051 } 1052 1053 void MacroAssembler::call(const address dest, Register temp) { 1054 assert_cond(dest != nullptr); 1055 assert(temp != noreg, "expecting a register"); 1056 int32_t offset = 0; 1057 la(temp, dest, offset); 1058 jalr(temp, offset); 1059 } 1060 1061 void MacroAssembler::jalr(Register Rs, int32_t offset) { 1062 assert(Rs != noreg, "expecting a register"); 1063 Assembler::jalr(x1, Rs, offset); 1064 } 1065 1066 void MacroAssembler::rt_call(address dest, Register tmp) { 1067 CodeBlob *cb = CodeCache::find_blob(dest); 1068 RuntimeAddress target(dest); 1069 if (cb) { 1070 far_call(target, tmp); 1071 } else { 1072 relocate(target.rspec(), [&] { 1073 int32_t offset; 1074 la(tmp, target.target(), offset); 1075 jalr(tmp, offset); 1076 }); 1077 } 1078 } 1079 1080 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 1081 if (L.is_bound()) { 1082 (this->*insn)(Rt, target(L)); 1083 } else { 1084 L.add_patch_at(code(), locator()); 1085 (this->*insn)(Rt, pc()); 1086 } 1087 } 1088 1089 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 1090 compare_and_branch_insn insn, 1091 compare_and_branch_label_insn neg_insn, bool is_far) { 1092 if (is_far) { 1093 Label done; 1094 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 1095 j(L); 1096 bind(done); 1097 } else { 1098 if (L.is_bound()) { 1099 (this->*insn)(r1, r2, target(L)); 1100 } else { 1101 L.add_patch_at(code(), locator()); 1102 (this->*insn)(r1, r2, pc()); 1103 } 1104 } 1105 } 1106 1107 #define INSN(NAME, NEG_INSN) \ 1108 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 1109 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 1110 } 1111 1112 INSN(beq, bne); 1113 INSN(bne, beq); 1114 INSN(blt, bge); 1115 INSN(bge, blt); 1116 INSN(bltu, bgeu); 1117 INSN(bgeu, bltu); 1118 1119 #undef INSN 1120 1121 #define INSN(NAME) \ 1122 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 1123 NAME(Rs, zr, dest); \ 1124 } \ 1125 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 1126 NAME(Rs, zr, l, is_far); \ 1127 } \ 1128 1129 INSN(beq); 1130 INSN(bne); 1131 INSN(blt); 1132 INSN(ble); 1133 INSN(bge); 1134 INSN(bgt); 1135 1136 #undef INSN 1137 1138 #define INSN(NAME, NEG_INSN) \ 1139 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 1140 NEG_INSN(Rt, Rs, dest); \ 1141 } \ 1142 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 1143 NEG_INSN(Rt, Rs, l, is_far); \ 1144 } 1145 1146 INSN(bgt, blt); 1147 INSN(ble, bge); 1148 INSN(bgtu, bltu); 1149 INSN(bleu, bgeu); 1150 1151 #undef INSN 1152 1153 // Float compare branch instructions 1154 1155 #define INSN(NAME, FLOATCMP, BRANCH) \ 1156 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1157 FLOATCMP##_s(t0, Rs1, Rs2); \ 1158 BRANCH(t0, l, is_far); \ 1159 } \ 1160 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1161 FLOATCMP##_d(t0, Rs1, Rs2); \ 1162 BRANCH(t0, l, is_far); \ 1163 } 1164 1165 INSN(beq, feq, bnez); 1166 INSN(bne, feq, beqz); 1167 1168 #undef INSN 1169 1170 1171 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1172 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1173 bool is_far, bool is_unordered) { \ 1174 if (is_unordered) { \ 1175 /* jump if either source is NaN or condition is expected */ \ 1176 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1177 beqz(t0, l, is_far); \ 1178 } else { \ 1179 /* jump if no NaN in source and condition is expected */ \ 1180 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1181 bnez(t0, l, is_far); \ 1182 } \ 1183 } \ 1184 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1185 bool is_far, bool is_unordered) { \ 1186 if (is_unordered) { \ 1187 /* jump if either source is NaN or condition is expected */ \ 1188 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1189 beqz(t0, l, is_far); \ 1190 } else { \ 1191 /* jump if no NaN in source and condition is expected */ \ 1192 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1193 bnez(t0, l, is_far); \ 1194 } \ 1195 } 1196 1197 INSN(ble, fle, flt); 1198 INSN(blt, flt, fle); 1199 1200 #undef INSN 1201 1202 #define INSN(NAME, CMP) \ 1203 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1204 bool is_far, bool is_unordered) { \ 1205 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1206 } \ 1207 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1208 bool is_far, bool is_unordered) { \ 1209 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1210 } 1211 1212 INSN(bgt, blt); 1213 INSN(bge, ble); 1214 1215 #undef INSN 1216 1217 1218 #define INSN(NAME, CSR) \ 1219 void MacroAssembler::NAME(Register Rd) { \ 1220 csrr(Rd, CSR); \ 1221 } 1222 1223 INSN(rdinstret, CSR_INSTRET); 1224 INSN(rdcycle, CSR_CYCLE); 1225 INSN(rdtime, CSR_TIME); 1226 INSN(frcsr, CSR_FCSR); 1227 INSN(frrm, CSR_FRM); 1228 INSN(frflags, CSR_FFLAGS); 1229 1230 #undef INSN 1231 1232 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1233 csrrs(Rd, csr, x0); 1234 } 1235 1236 #define INSN(NAME, OPFUN) \ 1237 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1238 OPFUN(x0, csr, Rs); \ 1239 } 1240 1241 INSN(csrw, csrrw); 1242 INSN(csrs, csrrs); 1243 INSN(csrc, csrrc); 1244 1245 #undef INSN 1246 1247 #define INSN(NAME, OPFUN) \ 1248 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1249 OPFUN(x0, csr, imm); \ 1250 } 1251 1252 INSN(csrwi, csrrwi); 1253 INSN(csrsi, csrrsi); 1254 INSN(csrci, csrrci); 1255 1256 #undef INSN 1257 1258 #define INSN(NAME, CSR) \ 1259 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1260 csrrw(Rd, CSR, Rs); \ 1261 } 1262 1263 INSN(fscsr, CSR_FCSR); 1264 INSN(fsrm, CSR_FRM); 1265 INSN(fsflags, CSR_FFLAGS); 1266 1267 #undef INSN 1268 1269 #define INSN(NAME) \ 1270 void MacroAssembler::NAME(Register Rs) { \ 1271 NAME(x0, Rs); \ 1272 } 1273 1274 INSN(fscsr); 1275 INSN(fsrm); 1276 INSN(fsflags); 1277 1278 #undef INSN 1279 1280 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1281 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1282 csrrwi(Rd, CSR_FRM, imm); 1283 } 1284 1285 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1286 csrrwi(Rd, CSR_FFLAGS, imm); 1287 } 1288 1289 #define INSN(NAME) \ 1290 void MacroAssembler::NAME(unsigned imm) { \ 1291 NAME(x0, imm); \ 1292 } 1293 1294 INSN(fsrmi); 1295 INSN(fsflagsi); 1296 1297 #undef INSN 1298 1299 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) { 1300 if (RestoreMXCSROnJNICalls) { 1301 Label skip_fsrmi; 1302 frrm(tmp); 1303 // Set FRM to the state we need. We do want Round to Nearest. 1304 // We don't want non-IEEE rounding modes. 1305 guarantee(RoundingMode::rne == 0, "must be"); 1306 beqz(tmp, skip_fsrmi); // Only reset FRM if it's wrong 1307 fsrmi(RoundingMode::rne); 1308 bind(skip_fsrmi); 1309 } 1310 } 1311 1312 void MacroAssembler::push_reg(Register Rs) 1313 { 1314 addi(esp, esp, 0 - wordSize); 1315 sd(Rs, Address(esp, 0)); 1316 } 1317 1318 void MacroAssembler::pop_reg(Register Rd) 1319 { 1320 ld(Rd, Address(esp, 0)); 1321 addi(esp, esp, wordSize); 1322 } 1323 1324 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1325 int count = 0; 1326 // Scan bitset to accumulate register pairs 1327 for (int reg = 31; reg >= 0; reg--) { 1328 if ((1U << 31) & bitset) { 1329 regs[count++] = reg; 1330 } 1331 bitset <<= 1; 1332 } 1333 return count; 1334 } 1335 1336 // Push integer registers in the bitset supplied. Don't push sp. 1337 // Return the number of words pushed 1338 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1339 DEBUG_ONLY(int words_pushed = 0;) 1340 unsigned char regs[32]; 1341 int count = bitset_to_regs(bitset, regs); 1342 // reserve one slot to align for odd count 1343 int offset = is_even(count) ? 0 : wordSize; 1344 1345 if (count) { 1346 addi(stack, stack, -count * wordSize - offset); 1347 } 1348 for (int i = count - 1; i >= 0; i--) { 1349 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1350 DEBUG_ONLY(words_pushed++;) 1351 } 1352 1353 assert(words_pushed == count, "oops, pushed != count"); 1354 1355 return count; 1356 } 1357 1358 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1359 DEBUG_ONLY(int words_popped = 0;) 1360 unsigned char regs[32]; 1361 int count = bitset_to_regs(bitset, regs); 1362 // reserve one slot to align for odd count 1363 int offset = is_even(count) ? 0 : wordSize; 1364 1365 for (int i = count - 1; i >= 0; i--) { 1366 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1367 DEBUG_ONLY(words_popped++;) 1368 } 1369 1370 if (count) { 1371 addi(stack, stack, count * wordSize + offset); 1372 } 1373 assert(words_popped == count, "oops, popped != count"); 1374 1375 return count; 1376 } 1377 1378 // Push floating-point registers in the bitset supplied. 1379 // Return the number of words pushed 1380 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1381 DEBUG_ONLY(int words_pushed = 0;) 1382 unsigned char regs[32]; 1383 int count = bitset_to_regs(bitset, regs); 1384 int push_slots = count + (count & 1); 1385 1386 if (count) { 1387 addi(stack, stack, -push_slots * wordSize); 1388 } 1389 1390 for (int i = count - 1; i >= 0; i--) { 1391 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1392 DEBUG_ONLY(words_pushed++;) 1393 } 1394 1395 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1396 1397 return count; 1398 } 1399 1400 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1401 DEBUG_ONLY(int words_popped = 0;) 1402 unsigned char regs[32]; 1403 int count = bitset_to_regs(bitset, regs); 1404 int pop_slots = count + (count & 1); 1405 1406 for (int i = count - 1; i >= 0; i--) { 1407 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1408 DEBUG_ONLY(words_popped++;) 1409 } 1410 1411 if (count) { 1412 addi(stack, stack, pop_slots * wordSize); 1413 } 1414 1415 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1416 1417 return count; 1418 } 1419 1420 static const int64_t right_32_bits = right_n_bits(32); 1421 static const int64_t right_8_bits = right_n_bits(8); 1422 1423 /** 1424 * Emits code to update CRC-32 with a byte value according to constants in table 1425 * 1426 * @param [in,out]crc Register containing the crc. 1427 * @param [in]val Register containing the byte to fold into the CRC. 1428 * @param [in]table Register containing the table of crc constants. 1429 * 1430 * uint32_t crc; 1431 * val = crc_table[(val ^ crc) & 0xFF]; 1432 * crc = val ^ (crc >> 8); 1433 * 1434 */ 1435 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 1436 assert_different_registers(crc, val, table); 1437 1438 xorr(val, val, crc); 1439 andi(val, val, right_8_bits); 1440 shadd(val, val, table, val, 2); 1441 lwu(val, Address(val)); 1442 srli(crc, crc, 8); 1443 xorr(crc, val, crc); 1444 } 1445 1446 /** 1447 * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 1448 * 1449 * @param [in,out]crc Register containing the crc. 1450 * @param [in]v Register containing the 32-bit to fold into the CRC. 1451 * @param [in]table0 Register containing table 0 of crc constants. 1452 * @param [in]table1 Register containing table 1 of crc constants. 1453 * @param [in]table2 Register containing table 2 of crc constants. 1454 * @param [in]table3 Register containing table 3 of crc constants. 1455 * 1456 * uint32_t crc; 1457 * v = crc ^ v 1458 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 1459 * 1460 */ 1461 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3, 1462 Register table0, Register table1, Register table2, Register table3, bool upper) { 1463 assert_different_registers(crc, v, tmp1, tmp2, tmp3, table0, table1, table2, table3); 1464 1465 if (upper) 1466 srli(v, v, 32); 1467 xorr(v, v, crc); 1468 1469 andi(tmp1, v, right_8_bits); 1470 shadd(tmp1, tmp1, table3, tmp2, 2); 1471 lwu(crc, Address(tmp1)); 1472 1473 slli(tmp1, v, 16); 1474 slli(tmp3, v, 8); 1475 1476 srliw(tmp1, tmp1, 24); 1477 srliw(tmp3, tmp3, 24); 1478 1479 shadd(tmp1, tmp1, table2, tmp1, 2); 1480 lwu(tmp2, Address(tmp1)); 1481 1482 shadd(tmp3, tmp3, table1, tmp3, 2); 1483 xorr(crc, crc, tmp2); 1484 1485 lwu(tmp2, Address(tmp3)); 1486 // It is more optimal to use 'srli' instead of 'srliw' for case when it is not necessary to clean upper bits 1487 if (upper) 1488 srli(tmp1, v, 24); 1489 else 1490 srliw(tmp1, v, 24); 1491 1492 // no need to clear bits other than lowest two 1493 shadd(tmp1, tmp1, table0, tmp1, 2); 1494 xorr(crc, crc, tmp2); 1495 lwu(tmp2, Address(tmp1)); 1496 xorr(crc, crc, tmp2); 1497 } 1498 1499 /** 1500 * @param crc register containing existing CRC (32-bit) 1501 * @param buf register pointing to input byte buffer (byte*) 1502 * @param len register containing number of bytes 1503 * @param table register that will contain address of CRC table 1504 * @param tmp scratch registers 1505 */ 1506 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 1507 Register table0, Register table1, Register table2, Register table3, 1508 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) { 1509 assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 1510 Label L_by16_loop, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; 1511 1512 const int64_t unroll = 16; 1513 const int64_t unroll_words = unroll*wordSize; 1514 mv(tmp5, right_32_bits); 1515 subw(len, len, unroll_words); 1516 andn(crc, tmp5, crc); 1517 1518 const ExternalAddress table_addr = StubRoutines::crc_table_addr(); 1519 la(table0, table_addr); 1520 add(table1, table0, 1*256*sizeof(juint), tmp1); 1521 add(table2, table0, 2*256*sizeof(juint), tmp1); 1522 add(table3, table2, 1*256*sizeof(juint), tmp1); 1523 1524 bge(len, zr, L_unroll_loop_entry); 1525 addiw(len, len, unroll_words-4); 1526 bge(len, zr, L_by4_loop); 1527 addiw(len, len, 4); 1528 bgt(len, zr, L_by1_loop); 1529 j(L_exit); 1530 1531 align(CodeEntryAlignment); 1532 bind(L_unroll_loop_entry); 1533 const Register buf_end = tmp3; 1534 add(buf_end, buf, len); // buf_end will be used as endpoint for loop below 1535 andi(len, len, unroll_words-1); // len = (len % unroll_words) 1536 sub(len, len, unroll_words); // Length after all iterations 1537 bind(L_unroll_loop); 1538 for (int i = 0; i < unroll; i++) { 1539 ld(tmp1, Address(buf, i*wordSize)); 1540 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1541 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, true); 1542 } 1543 1544 addi(buf, buf, unroll_words); 1545 ble(buf, buf_end, L_unroll_loop); 1546 addiw(len, len, unroll_words-4); 1547 bge(len, zr, L_by4_loop); 1548 addiw(len, len, 4); 1549 bgt(len, zr, L_by1_loop); 1550 j(L_exit); 1551 1552 bind(L_by4_loop); 1553 lwu(tmp1, Address(buf)); 1554 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1555 subw(len, len, 4); 1556 addi(buf, buf, 4); 1557 bge(len, zr, L_by4_loop); 1558 addiw(len, len, 4); 1559 ble(len, zr, L_exit); 1560 1561 bind(L_by1_loop); 1562 subw(len, len, 1); 1563 lwu(tmp1, Address(buf)); 1564 andi(tmp2, tmp1, right_8_bits); 1565 update_byte_crc32(crc, tmp2, table0); 1566 ble(len, zr, L_exit); 1567 1568 subw(len, len, 1); 1569 srli(tmp2, tmp1, 8); 1570 andi(tmp2, tmp2, right_8_bits); 1571 update_byte_crc32(crc, tmp2, table0); 1572 ble(len, zr, L_exit); 1573 1574 subw(len, len, 1); 1575 srli(tmp2, tmp1, 16); 1576 andi(tmp2, tmp2, right_8_bits); 1577 update_byte_crc32(crc, tmp2, table0); 1578 ble(len, zr, L_exit); 1579 1580 srli(tmp2, tmp1, 24); 1581 andi(tmp2, tmp2, right_8_bits); 1582 update_byte_crc32(crc, tmp2, table0); 1583 1584 bind(L_exit); 1585 andn(crc, tmp5, crc); 1586 } 1587 1588 #ifdef COMPILER2 1589 // Push vector registers in the bitset supplied. 1590 // Return the number of words pushed 1591 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1592 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1593 1594 // Scan bitset to accumulate register pairs 1595 unsigned char regs[32]; 1596 int count = bitset_to_regs(bitset, regs); 1597 1598 for (int i = 0; i < count; i++) { 1599 sub(stack, stack, vector_size_in_bytes); 1600 vs1r_v(as_VectorRegister(regs[i]), stack); 1601 } 1602 1603 return count * vector_size_in_bytes / wordSize; 1604 } 1605 1606 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1607 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1608 1609 // Scan bitset to accumulate register pairs 1610 unsigned char regs[32]; 1611 int count = bitset_to_regs(bitset, regs); 1612 1613 for (int i = count - 1; i >= 0; i--) { 1614 vl1r_v(as_VectorRegister(regs[i]), stack); 1615 add(stack, stack, vector_size_in_bytes); 1616 } 1617 1618 return count * vector_size_in_bytes / wordSize; 1619 } 1620 #endif // COMPILER2 1621 1622 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1623 // Push integer registers x7, x10-x17, x28-x31. 1624 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1625 1626 // Push float registers f0-f7, f10-f17, f28-f31. 1627 addi(sp, sp, - wordSize * 20); 1628 int offset = 0; 1629 for (int i = 0; i < 32; i++) { 1630 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1631 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1632 } 1633 } 1634 } 1635 1636 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1637 int offset = 0; 1638 for (int i = 0; i < 32; i++) { 1639 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1640 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1641 } 1642 } 1643 addi(sp, sp, wordSize * 20); 1644 1645 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1646 } 1647 1648 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1649 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1650 push_reg(RegSet::range(x5, x31), sp); 1651 1652 // float registers 1653 addi(sp, sp, - 32 * wordSize); 1654 for (int i = 0; i < 32; i++) { 1655 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1656 } 1657 1658 // vector registers 1659 if (save_vectors) { 1660 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1661 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1662 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1663 add(t0, sp, vector_size_in_bytes * i); 1664 vse64_v(as_VectorRegister(i), t0); 1665 } 1666 } 1667 } 1668 1669 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1670 // vector registers 1671 if (restore_vectors) { 1672 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1673 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1674 vle64_v(as_VectorRegister(i), sp); 1675 add(sp, sp, vector_size_in_bytes * 8); 1676 } 1677 } 1678 1679 // float registers 1680 for (int i = 0; i < 32; i++) { 1681 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1682 } 1683 addi(sp, sp, 32 * wordSize); 1684 1685 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1686 pop_reg(RegSet::range(x5, x31), sp); 1687 } 1688 1689 static int patch_offset_in_jal(address branch, int64_t offset) { 1690 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1691 "offset is too large to be patched in one jal instruction!\n"); 1692 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1693 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1694 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1695 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1696 return MacroAssembler::instruction_size; // only one instruction 1697 } 1698 1699 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1700 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1701 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1702 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1703 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1704 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1705 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1706 return MacroAssembler::instruction_size; // only one instruction 1707 } 1708 1709 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1710 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1711 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1712 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1713 return PC_RELATIVE_INSTRUCTION_NUM * MacroAssembler::instruction_size; 1714 } 1715 1716 static int patch_addr_in_movptr1(address branch, address target) { 1717 int32_t lower = ((intptr_t)target << 35) >> 35; 1718 int64_t upper = ((intptr_t)target - lower) >> 29; 1719 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1720 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1721 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1722 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1723 return MacroAssembler::movptr1_instruction_size; 1724 } 1725 1726 static int patch_addr_in_movptr2(address instruction_address, address target) { 1727 uintptr_t addr = (uintptr_t)target; 1728 1729 assert(addr < (1ull << 48), "48-bit overflow in address constant"); 1730 unsigned int upper18 = (addr >> 30ull); 1731 int lower30 = (addr & 0x3fffffffu); 1732 int low12 = (lower30 << 20) >> 20; 1733 int mid18 = ((lower30 - low12) >> 12); 1734 1735 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 0), 31, 12, (upper18 & 0xfffff)); // Lui 1736 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 1), 31, 12, (mid18 & 0xfffff)); // Lui 1737 // Slli 1738 // Add 1739 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 4), 31, 20, low12 & 0xfff); // Addi/Jalr/Load 1740 1741 assert(MacroAssembler::target_addr_for_insn(instruction_address) == target, "Must be"); 1742 1743 return MacroAssembler::movptr2_instruction_size; 1744 } 1745 1746 static int patch_imm_in_li16u(address branch, uint16_t target) { 1747 Assembler::patch(branch, 31, 12, target); // patch lui only 1748 return MacroAssembler::instruction_size; 1749 } 1750 1751 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1752 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1753 int64_t upper = (intptr_t)target; 1754 int32_t lower = (((int32_t)target) << 20) >> 20; 1755 upper -= lower; 1756 upper = (int32_t)upper; 1757 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1758 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1759 return LI32_INSTRUCTIONS_NUM * MacroAssembler::instruction_size; 1760 } 1761 1762 static long get_offset_of_jal(address insn_addr) { 1763 assert_cond(insn_addr != nullptr); 1764 long offset = 0; 1765 unsigned insn = Assembler::ld_instr(insn_addr); 1766 long val = (long)Assembler::sextract(insn, 31, 12); 1767 offset |= ((val >> 19) & 0x1) << 20; 1768 offset |= (val & 0xff) << 12; 1769 offset |= ((val >> 8) & 0x1) << 11; 1770 offset |= ((val >> 9) & 0x3ff) << 1; 1771 offset = (offset << 43) >> 43; 1772 return offset; 1773 } 1774 1775 static long get_offset_of_conditional_branch(address insn_addr) { 1776 long offset = 0; 1777 assert_cond(insn_addr != nullptr); 1778 unsigned insn = Assembler::ld_instr(insn_addr); 1779 offset = (long)Assembler::sextract(insn, 31, 31); 1780 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1781 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1782 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1783 offset = (offset << 41) >> 41; 1784 return offset; 1785 } 1786 1787 static long get_offset_of_pc_relative(address insn_addr) { 1788 long offset = 0; 1789 assert_cond(insn_addr != nullptr); 1790 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1791 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1792 offset = (offset << 32) >> 32; 1793 return offset; 1794 } 1795 1796 static address get_target_of_movptr1(address insn_addr) { 1797 assert_cond(insn_addr != nullptr); 1798 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1799 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1800 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1801 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1802 return (address) target_address; 1803 } 1804 1805 static address get_target_of_movptr2(address insn_addr) { 1806 assert_cond(insn_addr != nullptr); 1807 int32_t upper18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 0), 31, 12)) & 0xfffff); // Lui 1808 int32_t mid18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 1), 31, 12)) & 0xfffff); // Lui 1809 // 2 // Slli 1810 // 3 // Add 1811 int32_t low12 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 4), 31, 20))); // Addi/Jalr/Load. 1812 address ret = (address)(((intptr_t)upper18<<30ll) + ((intptr_t)mid18<<12ll) + low12); 1813 return ret; 1814 } 1815 1816 address MacroAssembler::get_target_of_li32(address insn_addr) { 1817 assert_cond(insn_addr != nullptr); 1818 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1819 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1820 return (address)target_address; 1821 } 1822 1823 // Patch any kind of instruction; there may be several instructions. 1824 // Return the total length (in bytes) of the instructions. 1825 int MacroAssembler::pd_patch_instruction_size(address instruction_address, address target) { 1826 assert_cond(instruction_address != nullptr); 1827 int64_t offset = target - instruction_address; 1828 if (MacroAssembler::is_jal_at(instruction_address)) { // jal 1829 return patch_offset_in_jal(instruction_address, offset); 1830 } else if (MacroAssembler::is_branch_at(instruction_address)) { // beq/bge/bgeu/blt/bltu/bne 1831 return patch_offset_in_conditional_branch(instruction_address, offset); 1832 } else if (MacroAssembler::is_pc_relative_at(instruction_address)) { // auipc, addi/jalr/load 1833 return patch_offset_in_pc_relative(instruction_address, offset); 1834 } else if (MacroAssembler::is_movptr1_at(instruction_address)) { // movptr1 1835 return patch_addr_in_movptr1(instruction_address, target); 1836 } else if (MacroAssembler::is_movptr2_at(instruction_address)) { // movptr2 1837 return patch_addr_in_movptr2(instruction_address, target); 1838 } else if (MacroAssembler::is_li32_at(instruction_address)) { // li32 1839 int64_t imm = (intptr_t)target; 1840 return patch_imm_in_li32(instruction_address, (int32_t)imm); 1841 } else if (MacroAssembler::is_li16u_at(instruction_address)) { 1842 int64_t imm = (intptr_t)target; 1843 return patch_imm_in_li16u(instruction_address, (uint16_t)imm); 1844 } else { 1845 #ifdef ASSERT 1846 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1847 Assembler::ld_instr(instruction_address), p2i(instruction_address)); 1848 Disassembler::decode(instruction_address - 16, instruction_address + 16); 1849 #endif 1850 ShouldNotReachHere(); 1851 return -1; 1852 } 1853 } 1854 1855 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1856 long offset = 0; 1857 assert_cond(insn_addr != nullptr); 1858 if (MacroAssembler::is_jal_at(insn_addr)) { // jal 1859 offset = get_offset_of_jal(insn_addr); 1860 } else if (MacroAssembler::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1861 offset = get_offset_of_conditional_branch(insn_addr); 1862 } else if (MacroAssembler::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1863 offset = get_offset_of_pc_relative(insn_addr); 1864 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { // movptr1 1865 return get_target_of_movptr1(insn_addr); 1866 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { // movptr2 1867 return get_target_of_movptr2(insn_addr); 1868 } else if (MacroAssembler::is_li32_at(insn_addr)) { // li32 1869 return get_target_of_li32(insn_addr); 1870 } else { 1871 ShouldNotReachHere(); 1872 } 1873 return address(((uintptr_t)insn_addr + offset)); 1874 } 1875 1876 int MacroAssembler::patch_oop(address insn_addr, address o) { 1877 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1878 // narrow OOPs by setting the upper 16 bits in the first 1879 // instruction. 1880 if (MacroAssembler::is_li32_at(insn_addr)) { 1881 // Move narrow OOP 1882 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1883 return patch_imm_in_li32(insn_addr, (int32_t)n); 1884 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { 1885 // Move wide OOP 1886 return patch_addr_in_movptr1(insn_addr, o); 1887 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { 1888 // Move wide OOP 1889 return patch_addr_in_movptr2(insn_addr, o); 1890 } 1891 ShouldNotReachHere(); 1892 return -1; 1893 } 1894 1895 void MacroAssembler::reinit_heapbase() { 1896 if (UseCompressedOops) { 1897 if (Universe::is_fully_initialized()) { 1898 mv(xheapbase, CompressedOops::ptrs_base()); 1899 } else { 1900 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1901 relocate(target.rspec(), [&] { 1902 int32_t offset; 1903 la(xheapbase, target.target(), offset); 1904 ld(xheapbase, Address(xheapbase, offset)); 1905 }); 1906 } 1907 } 1908 } 1909 1910 void MacroAssembler::movptr(Register Rd, address addr, Register temp) { 1911 int offset = 0; 1912 movptr(Rd, addr, offset, temp); 1913 addi(Rd, Rd, offset); 1914 } 1915 1916 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset, Register temp) { 1917 uint64_t uimm64 = (uint64_t)addr; 1918 #ifndef PRODUCT 1919 { 1920 char buffer[64]; 1921 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, uimm64); 1922 block_comment(buffer); 1923 } 1924 #endif 1925 assert(uimm64 < (1ull << 48), "48-bit overflow in address constant"); 1926 1927 if (temp == noreg) { 1928 movptr1(Rd, uimm64, offset); 1929 } else { 1930 movptr2(Rd, uimm64, offset, temp); 1931 } 1932 } 1933 1934 void MacroAssembler::movptr1(Register Rd, uint64_t imm64, int32_t &offset) { 1935 // Load upper 31 bits 1936 // 1937 // In case of 11th bit of `lower` is 0, it's straightforward to understand. 1938 // In case of 11th bit of `lower` is 1, it's a bit tricky, to help understand, 1939 // imagine divide both `upper` and `lower` into 2 parts respectively, i.e. 1940 // [upper_20, upper_12], [lower_20, lower_12], they are the same just before 1941 // `lower = (lower << 52) >> 52;`. 1942 // After `upper -= lower;`, 1943 // upper_20' = upper_20 - (-1) == upper_20 + 1 1944 // upper_12 = 0x000 1945 // After `lui(Rd, upper);`, `Rd` = upper_20' << 12 1946 // Also divide `Rd` into 2 parts [Rd_20, Rd_12], 1947 // Rd_20 == upper_20' 1948 // Rd_12 == 0x000 1949 // After `addi(Rd, Rd, lower);`, 1950 // Rd_20 = upper_20' + (-1) == upper_20 + 1 - 1 = upper_20 1951 // Rd_12 = lower_12 1952 // So, finally Rd == [upper_20, lower_12] 1953 int64_t imm = imm64 >> 17; 1954 int64_t upper = imm, lower = imm; 1955 lower = (lower << 52) >> 52; 1956 upper -= lower; 1957 upper = (int32_t)upper; 1958 lui(Rd, upper); 1959 addi(Rd, Rd, lower); 1960 1961 // Load the rest 17 bits. 1962 slli(Rd, Rd, 11); 1963 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1964 slli(Rd, Rd, 6); 1965 1966 // This offset will be used by following jalr/ld. 1967 offset = imm64 & 0x3f; 1968 } 1969 1970 void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Register tmp) { 1971 assert_different_registers(Rd, tmp, noreg); 1972 1973 // addr: [upper18, lower30[mid18, lower12]] 1974 1975 int64_t upper18 = addr >> 18; 1976 lui(tmp, upper18); 1977 1978 int64_t lower30 = addr & 0x3fffffff; 1979 int64_t mid18 = lower30, lower12 = lower30; 1980 lower12 = (lower12 << 52) >> 52; 1981 // For this tricky part (`mid18 -= lower12;` + `offset = lower12;`), 1982 // please refer to movptr1 above. 1983 mid18 -= (int32_t)lower12; 1984 lui(Rd, mid18); 1985 1986 slli(tmp, tmp, 18); 1987 add(Rd, Rd, tmp); 1988 1989 offset = lower12; 1990 } 1991 1992 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1993 if (is_simm12(increment)) { 1994 addi(Rd, Rn, increment); 1995 } else { 1996 assert_different_registers(Rn, temp); 1997 li(temp, increment); 1998 add(Rd, Rn, temp); 1999 } 2000 } 2001 2002 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 2003 if (is_simm12(increment)) { 2004 addiw(Rd, Rn, increment); 2005 } else { 2006 assert_different_registers(Rn, temp); 2007 li(temp, increment); 2008 addw(Rd, Rn, temp); 2009 } 2010 } 2011 2012 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 2013 if (is_simm12(-decrement)) { 2014 addi(Rd, Rn, -decrement); 2015 } else { 2016 assert_different_registers(Rn, temp); 2017 li(temp, decrement); 2018 sub(Rd, Rn, temp); 2019 } 2020 } 2021 2022 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 2023 if (is_simm12(-decrement)) { 2024 addiw(Rd, Rn, -decrement); 2025 } else { 2026 assert_different_registers(Rn, temp); 2027 li(temp, decrement); 2028 subw(Rd, Rn, temp); 2029 } 2030 } 2031 2032 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 2033 andr(Rd, Rs1, Rs2); 2034 sign_extend(Rd, Rd, 32); 2035 } 2036 2037 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 2038 orr(Rd, Rs1, Rs2); 2039 sign_extend(Rd, Rd, 32); 2040 } 2041 2042 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 2043 xorr(Rd, Rs1, Rs2); 2044 sign_extend(Rd, Rd, 32); 2045 } 2046 2047 // Rd = Rs1 & (~Rd2) 2048 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 2049 if (UseZbb) { 2050 Assembler::andn(Rd, Rs1, Rs2); 2051 return; 2052 } 2053 2054 notr(Rd, Rs2); 2055 andr(Rd, Rs1, Rd); 2056 } 2057 2058 // Rd = Rs1 | (~Rd2) 2059 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 2060 if (UseZbb) { 2061 Assembler::orn(Rd, Rs1, Rs2); 2062 return; 2063 } 2064 2065 notr(Rd, Rs2); 2066 orr(Rd, Rs1, Rd); 2067 } 2068 2069 // Note: load_unsigned_short used to be called load_unsigned_word. 2070 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2071 int off = offset(); 2072 lhu(dst, src); 2073 return off; 2074 } 2075 2076 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2077 int off = offset(); 2078 lbu(dst, src); 2079 return off; 2080 } 2081 2082 int MacroAssembler::load_signed_short(Register dst, Address src) { 2083 int off = offset(); 2084 lh(dst, src); 2085 return off; 2086 } 2087 2088 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2089 int off = offset(); 2090 lb(dst, src); 2091 return off; 2092 } 2093 2094 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2095 switch (size_in_bytes) { 2096 case 8: ld(dst, src); break; 2097 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 2098 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2099 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2100 default: ShouldNotReachHere(); 2101 } 2102 } 2103 2104 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 2105 switch (size_in_bytes) { 2106 case 8: sd(src, dst); break; 2107 case 4: sw(src, dst); break; 2108 case 2: sh(src, dst); break; 2109 case 1: sb(src, dst); break; 2110 default: ShouldNotReachHere(); 2111 } 2112 } 2113 2114 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 2115 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2116 if (granularity != 1 && granularity != 2) { 2117 ShouldNotReachHere(); 2118 } 2119 if (AvoidUnalignedAccesses && (granularity != 2)) { 2120 assert_different_registers(dst, tmp); 2121 assert_different_registers(tmp, src.base()); 2122 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 2123 slli(tmp, tmp, 8); 2124 lbu(dst, src); 2125 add(dst, dst, tmp); 2126 } else { 2127 is_signed ? lh(dst, src) : lhu(dst, src); 2128 } 2129 } 2130 2131 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 2132 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2133 if (AvoidUnalignedAccesses && (granularity != 4)) { 2134 switch(granularity) { 2135 case 1: 2136 assert_different_registers(dst, tmp, src.base()); 2137 lbu(dst, src); 2138 lbu(tmp, Address(src.base(), src.offset() + 1)); 2139 slli(tmp, tmp, 8); 2140 add(dst, dst, tmp); 2141 lbu(tmp, Address(src.base(), src.offset() + 2)); 2142 slli(tmp, tmp, 16); 2143 add(dst, dst, tmp); 2144 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 2145 slli(tmp, tmp, 24); 2146 add(dst, dst, tmp); 2147 break; 2148 case 2: 2149 assert_different_registers(dst, tmp); 2150 assert_different_registers(tmp, src.base()); 2151 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 2152 slli(tmp, tmp, 16); 2153 lhu(dst, src); 2154 add(dst, dst, tmp); 2155 break; 2156 default: 2157 ShouldNotReachHere(); 2158 } 2159 } else { 2160 is_signed ? lw(dst, src) : lwu(dst, src); 2161 } 2162 } 2163 2164 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 2165 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 2166 if (AvoidUnalignedAccesses && (granularity != 8)) { 2167 switch(granularity){ 2168 case 1: 2169 assert_different_registers(dst, tmp, src.base()); 2170 lbu(dst, src); 2171 lbu(tmp, Address(src.base(), src.offset() + 1)); 2172 slli(tmp, tmp, 8); 2173 add(dst, dst, tmp); 2174 lbu(tmp, Address(src.base(), src.offset() + 2)); 2175 slli(tmp, tmp, 16); 2176 add(dst, dst, tmp); 2177 lbu(tmp, Address(src.base(), src.offset() + 3)); 2178 slli(tmp, tmp, 24); 2179 add(dst, dst, tmp); 2180 lbu(tmp, Address(src.base(), src.offset() + 4)); 2181 slli(tmp, tmp, 32); 2182 add(dst, dst, tmp); 2183 lbu(tmp, Address(src.base(), src.offset() + 5)); 2184 slli(tmp, tmp, 40); 2185 add(dst, dst, tmp); 2186 lbu(tmp, Address(src.base(), src.offset() + 6)); 2187 slli(tmp, tmp, 48); 2188 add(dst, dst, tmp); 2189 lbu(tmp, Address(src.base(), src.offset() + 7)); 2190 slli(tmp, tmp, 56); 2191 add(dst, dst, tmp); 2192 break; 2193 case 2: 2194 assert_different_registers(dst, tmp, src.base()); 2195 lhu(dst, src); 2196 lhu(tmp, Address(src.base(), src.offset() + 2)); 2197 slli(tmp, tmp, 16); 2198 add(dst, dst, tmp); 2199 lhu(tmp, Address(src.base(), src.offset() + 4)); 2200 slli(tmp, tmp, 32); 2201 add(dst, dst, tmp); 2202 lhu(tmp, Address(src.base(), src.offset() + 6)); 2203 slli(tmp, tmp, 48); 2204 add(dst, dst, tmp); 2205 break; 2206 case 4: 2207 assert_different_registers(dst, tmp); 2208 assert_different_registers(tmp, src.base()); 2209 lwu(tmp, Address(src.base(), src.offset() + 4)); 2210 slli(tmp, tmp, 32); 2211 lwu(dst, src); 2212 add(dst, dst, tmp); 2213 break; 2214 default: 2215 ShouldNotReachHere(); 2216 } 2217 } else { 2218 ld(dst, src); 2219 } 2220 } 2221 2222 2223 // reverse bytes in halfword in lower 16 bits and sign-extend 2224 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 2225 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 2226 if (UseZbb) { 2227 rev8(Rd, Rs); 2228 srai(Rd, Rd, 48); 2229 return; 2230 } 2231 assert_different_registers(Rs, tmp); 2232 assert_different_registers(Rd, tmp); 2233 srli(tmp, Rs, 8); 2234 andi(tmp, tmp, 0xFF); 2235 slli(Rd, Rs, 56); 2236 srai(Rd, Rd, 48); // sign-extend 2237 orr(Rd, Rd, tmp); 2238 } 2239 2240 // reverse bytes in lower word and sign-extend 2241 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 2242 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2243 if (UseZbb) { 2244 rev8(Rd, Rs); 2245 srai(Rd, Rd, 32); 2246 return; 2247 } 2248 assert_different_registers(Rs, tmp1, tmp2); 2249 assert_different_registers(Rd, tmp1, tmp2); 2250 revb_h_w_u(Rd, Rs, tmp1, tmp2); 2251 slli(tmp2, Rd, 48); 2252 srai(tmp2, tmp2, 32); // sign-extend 2253 srli(Rd, Rd, 16); 2254 orr(Rd, Rd, tmp2); 2255 } 2256 2257 // reverse bytes in halfword in lower 16 bits and zero-extend 2258 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2259 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 2260 if (UseZbb) { 2261 rev8(Rd, Rs); 2262 srli(Rd, Rd, 48); 2263 return; 2264 } 2265 assert_different_registers(Rs, tmp); 2266 assert_different_registers(Rd, tmp); 2267 srli(tmp, Rs, 8); 2268 andi(tmp, tmp, 0xFF); 2269 andi(Rd, Rs, 0xFF); 2270 slli(Rd, Rd, 8); 2271 orr(Rd, Rd, tmp); 2272 } 2273 2274 // reverse bytes in halfwords in lower 32 bits and zero-extend 2275 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2276 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2277 if (UseZbb) { 2278 rev8(Rd, Rs); 2279 rori(Rd, Rd, 32); 2280 roriw(Rd, Rd, 16); 2281 zero_extend(Rd, Rd, 32); 2282 return; 2283 } 2284 assert_different_registers(Rs, tmp1, tmp2); 2285 assert_different_registers(Rd, tmp1, tmp2); 2286 srli(tmp2, Rs, 16); 2287 revb_h_h_u(tmp2, tmp2, tmp1); 2288 revb_h_h_u(Rd, Rs, tmp1); 2289 slli(tmp2, tmp2, 16); 2290 orr(Rd, Rd, tmp2); 2291 } 2292 2293 // This method is only used for revb_h 2294 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 2295 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2296 assert_different_registers(Rs, tmp1, tmp2); 2297 assert_different_registers(Rd, tmp1); 2298 srli(tmp1, Rs, 48); 2299 andi(tmp2, tmp1, 0xFF); 2300 slli(tmp2, tmp2, 8); 2301 srli(tmp1, tmp1, 8); 2302 orr(tmp1, tmp1, tmp2); 2303 slli(Rd, Rs, 16); 2304 orr(Rd, Rd, tmp1); 2305 } 2306 2307 // reverse bytes in each halfword 2308 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 2309 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2310 if (UseZbb) { 2311 assert_different_registers(Rs, tmp1); 2312 assert_different_registers(Rd, tmp1); 2313 rev8(Rd, Rs); 2314 zero_extend(tmp1, Rd, 32); 2315 roriw(tmp1, tmp1, 16); 2316 slli(tmp1, tmp1, 32); 2317 srli(Rd, Rd, 32); 2318 roriw(Rd, Rd, 16); 2319 zero_extend(Rd, Rd, 32); 2320 orr(Rd, Rd, tmp1); 2321 return; 2322 } 2323 assert_different_registers(Rs, tmp1, tmp2); 2324 assert_different_registers(Rd, tmp1, tmp2); 2325 revb_h_helper(Rd, Rs, tmp1, tmp2); 2326 for (int i = 0; i < 3; ++i) { 2327 revb_h_helper(Rd, Rd, tmp1, tmp2); 2328 } 2329 } 2330 2331 // reverse bytes in each word 2332 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 2333 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2334 if (UseZbb) { 2335 rev8(Rd, Rs); 2336 rori(Rd, Rd, 32); 2337 return; 2338 } 2339 assert_different_registers(Rs, tmp1, tmp2); 2340 assert_different_registers(Rd, tmp1, tmp2); 2341 revb(Rd, Rs, tmp1, tmp2); 2342 ror_imm(Rd, Rd, 32); 2343 } 2344 2345 // reverse bytes in doubleword 2346 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 2347 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2348 if (UseZbb) { 2349 rev8(Rd, Rs); 2350 return; 2351 } 2352 assert_different_registers(Rs, tmp1, tmp2); 2353 assert_different_registers(Rd, tmp1, tmp2); 2354 andi(tmp1, Rs, 0xFF); 2355 slli(tmp1, tmp1, 8); 2356 for (int step = 8; step < 56; step += 8) { 2357 srli(tmp2, Rs, step); 2358 andi(tmp2, tmp2, 0xFF); 2359 orr(tmp1, tmp1, tmp2); 2360 slli(tmp1, tmp1, 8); 2361 } 2362 srli(Rd, Rs, 56); 2363 andi(Rd, Rd, 0xFF); 2364 orr(Rd, tmp1, Rd); 2365 } 2366 2367 // rotate right with shift bits 2368 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 2369 { 2370 if (UseZbb) { 2371 rori(dst, src, shift); 2372 return; 2373 } 2374 2375 assert_different_registers(dst, tmp); 2376 assert_different_registers(src, tmp); 2377 assert(shift < 64, "shift amount must be < 64"); 2378 slli(tmp, src, 64 - shift); 2379 srli(dst, src, shift); 2380 orr(dst, dst, tmp); 2381 } 2382 2383 // rotate left with shift bits, 32-bit version 2384 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 2385 if (UseZbb) { 2386 // no roliw available 2387 roriw(dst, src, 32 - shift); 2388 return; 2389 } 2390 2391 assert_different_registers(dst, tmp); 2392 assert_different_registers(src, tmp); 2393 assert(shift < 32, "shift amount must be < 32"); 2394 srliw(tmp, src, 32 - shift); 2395 slliw(dst, src, shift); 2396 orr(dst, dst, tmp); 2397 } 2398 2399 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2400 if (is_simm12(imm)) { 2401 and_imm12(Rd, Rn, imm); 2402 } else { 2403 assert_different_registers(Rn, tmp); 2404 mv(tmp, imm); 2405 andr(Rd, Rn, tmp); 2406 } 2407 } 2408 2409 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2410 ld(tmp1, adr); 2411 if (src.is_register()) { 2412 orr(tmp1, tmp1, src.as_register()); 2413 } else { 2414 if (is_simm12(src.as_constant())) { 2415 ori(tmp1, tmp1, src.as_constant()); 2416 } else { 2417 assert_different_registers(tmp1, tmp2); 2418 mv(tmp2, src.as_constant()); 2419 orr(tmp1, tmp1, tmp2); 2420 } 2421 } 2422 sd(tmp1, adr); 2423 } 2424 2425 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2426 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2427 if (UseCompressedClassPointers) { 2428 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2429 if (CompressedKlassPointers::base() == nullptr) { 2430 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2431 beq(trial_klass, tmp1, L); 2432 return; 2433 } 2434 decode_klass_not_null(tmp1, tmp2); 2435 } else { 2436 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2437 } 2438 beq(trial_klass, tmp1, L); 2439 } 2440 2441 // Move an oop into a register. 2442 void MacroAssembler::movoop(Register dst, jobject obj) { 2443 int oop_index; 2444 if (obj == nullptr) { 2445 oop_index = oop_recorder()->allocate_oop_index(obj); 2446 } else { 2447 #ifdef ASSERT 2448 { 2449 ThreadInVMfromUnknown tiv; 2450 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2451 } 2452 #endif 2453 oop_index = oop_recorder()->find_index(obj); 2454 } 2455 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2456 2457 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2458 la(dst, Address((address)obj, rspec)); 2459 } else { 2460 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2461 ld_constant(dst, Address(dummy, rspec)); 2462 } 2463 } 2464 2465 // Move a metadata address into a register. 2466 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2467 assert((uintptr_t)obj < (1ull << 48), "48-bit overflow in metadata"); 2468 int oop_index; 2469 if (obj == nullptr) { 2470 oop_index = oop_recorder()->allocate_metadata_index(obj); 2471 } else { 2472 oop_index = oop_recorder()->find_index(obj); 2473 } 2474 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2475 la(dst, Address((address)obj, rspec)); 2476 } 2477 2478 // Writes to stack successive pages until offset reached to check for 2479 // stack overflow + shadow pages. This clobbers tmp. 2480 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2481 assert_different_registers(tmp, size, t0); 2482 // Bang stack for total size given plus shadow page size. 2483 // Bang one page at a time because large size can bang beyond yellow and 2484 // red zones. 2485 mv(t0, (int)os::vm_page_size()); 2486 Label loop; 2487 bind(loop); 2488 sub(tmp, sp, t0); 2489 subw(size, size, t0); 2490 sd(size, Address(tmp)); 2491 bgtz(size, loop); 2492 2493 // Bang down shadow pages too. 2494 // At this point, (tmp-0) is the last address touched, so don't 2495 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2496 // was post-decremented.) Skip this address by starting at i=1, and 2497 // touch a few more pages below. N.B. It is important to touch all 2498 // the way down to and including i=StackShadowPages. 2499 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2500 // this could be any sized move but this is can be a debugging crumb 2501 // so the bigger the better. 2502 sub(tmp, tmp, (int)os::vm_page_size()); 2503 sd(size, Address(tmp, 0)); 2504 } 2505 } 2506 2507 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2508 int32_t offset = 0; 2509 _masm = masm; 2510 ExternalAddress target((address)flag_addr); 2511 _masm->relocate(target.rspec(), [&] { 2512 int32_t offset; 2513 _masm->la(t0, target.target(), offset); 2514 _masm->lbu(t0, Address(t0, offset)); 2515 }); 2516 if (value) { 2517 _masm->bnez(t0, _label); 2518 } else { 2519 _masm->beqz(t0, _label); 2520 } 2521 } 2522 2523 SkipIfEqual::~SkipIfEqual() { 2524 _masm->bind(_label); 2525 _masm = nullptr; 2526 } 2527 2528 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2529 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2530 ld(dst, Address(xmethod, Method::const_offset())); 2531 ld(dst, Address(dst, ConstMethod::constants_offset())); 2532 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2533 ld(dst, Address(dst, mirror_offset)); 2534 resolve_oop_handle(dst, tmp1, tmp2); 2535 } 2536 2537 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2538 // OopHandle::resolve is an indirection. 2539 assert_different_registers(result, tmp1, tmp2); 2540 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2541 } 2542 2543 // ((WeakHandle)result).resolve() 2544 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2545 assert_different_registers(result, tmp1, tmp2); 2546 Label resolved; 2547 2548 // A null weak handle resolves to null. 2549 beqz(result, resolved); 2550 2551 // Only 64 bit platforms support GCs that require a tmp register 2552 // Only IN_HEAP loads require a thread_tmp register 2553 // WeakHandle::resolve is an indirection like jweak. 2554 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2555 result, Address(result), tmp1, tmp2); 2556 bind(resolved); 2557 } 2558 2559 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2560 Register dst, Address src, 2561 Register tmp1, Register tmp2) { 2562 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2563 decorators = AccessInternal::decorator_fixup(decorators, type); 2564 bool as_raw = (decorators & AS_RAW) != 0; 2565 if (as_raw) { 2566 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2567 } else { 2568 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2569 } 2570 } 2571 2572 void MacroAssembler::null_check(Register reg, int offset) { 2573 if (needs_explicit_null_check(offset)) { 2574 // provoke OS null exception if reg is null by 2575 // accessing M[reg] w/o changing any registers 2576 // NOTE: this is plenty to provoke a segv 2577 ld(zr, Address(reg, 0)); 2578 } else { 2579 // nothing to do, (later) access of M[reg + offset] 2580 // will provoke OS null exception if reg is null 2581 } 2582 } 2583 2584 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2585 Address dst, Register val, 2586 Register tmp1, Register tmp2, Register tmp3) { 2587 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2588 decorators = AccessInternal::decorator_fixup(decorators, type); 2589 bool as_raw = (decorators & AS_RAW) != 0; 2590 if (as_raw) { 2591 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2592 } else { 2593 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2594 } 2595 } 2596 2597 // Algorithm must match CompressedOops::encode. 2598 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2599 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2600 if (CompressedOops::base() == nullptr) { 2601 if (CompressedOops::shift() != 0) { 2602 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2603 srli(d, s, LogMinObjAlignmentInBytes); 2604 } else { 2605 mv(d, s); 2606 } 2607 } else { 2608 Label notNull; 2609 sub(d, s, xheapbase); 2610 bgez(d, notNull); 2611 mv(d, zr); 2612 bind(notNull); 2613 if (CompressedOops::shift() != 0) { 2614 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2615 srli(d, d, CompressedOops::shift()); 2616 } 2617 } 2618 } 2619 2620 void MacroAssembler::encode_heap_oop_not_null(Register r) { 2621 #ifdef ASSERT 2622 if (CheckCompressedOops) { 2623 Label ok; 2624 bnez(r, ok); 2625 stop("null oop passed to encode_heap_oop_not_null"); 2626 bind(ok); 2627 } 2628 #endif 2629 verify_oop_msg(r, "broken oop in encode_heap_oop_not_null"); 2630 if (CompressedOops::base() != nullptr) { 2631 sub(r, r, xheapbase); 2632 } 2633 if (CompressedOops::shift() != 0) { 2634 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2635 srli(r, r, LogMinObjAlignmentInBytes); 2636 } 2637 } 2638 2639 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2640 #ifdef ASSERT 2641 if (CheckCompressedOops) { 2642 Label ok; 2643 bnez(src, ok); 2644 stop("null oop passed to encode_heap_oop_not_null2"); 2645 bind(ok); 2646 } 2647 #endif 2648 verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2"); 2649 2650 Register data = src; 2651 if (CompressedOops::base() != nullptr) { 2652 sub(dst, src, xheapbase); 2653 data = dst; 2654 } 2655 if (CompressedOops::shift() != 0) { 2656 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2657 srli(dst, data, LogMinObjAlignmentInBytes); 2658 data = dst; 2659 } 2660 if (data == src) { 2661 mv(dst, src); 2662 } 2663 } 2664 2665 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2666 assert_different_registers(dst, tmp); 2667 assert_different_registers(src, tmp); 2668 if (UseCompressedClassPointers) { 2669 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2670 decode_klass_not_null(dst, tmp); 2671 } else { 2672 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2673 } 2674 } 2675 2676 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2677 // FIXME: Should this be a store release? concurrent gcs assumes 2678 // klass length is valid if klass field is not null. 2679 if (UseCompressedClassPointers) { 2680 encode_klass_not_null(src, tmp); 2681 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2682 } else { 2683 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2684 } 2685 } 2686 2687 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2688 if (UseCompressedClassPointers) { 2689 // Store to klass gap in destination 2690 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2691 } 2692 } 2693 2694 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2695 assert_different_registers(r, tmp); 2696 decode_klass_not_null(r, r, tmp); 2697 } 2698 2699 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2700 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2701 2702 if (CompressedKlassPointers::base() == nullptr) { 2703 if (CompressedKlassPointers::shift() != 0) { 2704 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2705 slli(dst, src, LogKlassAlignmentInBytes); 2706 } else { 2707 mv(dst, src); 2708 } 2709 return; 2710 } 2711 2712 Register xbase = dst; 2713 if (dst == src) { 2714 xbase = tmp; 2715 } 2716 2717 assert_different_registers(src, xbase); 2718 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2719 2720 if (CompressedKlassPointers::shift() != 0) { 2721 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2722 assert_different_registers(t0, xbase); 2723 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2724 } else { 2725 add(dst, xbase, src); 2726 } 2727 } 2728 2729 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2730 assert_different_registers(r, tmp); 2731 encode_klass_not_null(r, r, tmp); 2732 } 2733 2734 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2735 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2736 2737 if (CompressedKlassPointers::base() == nullptr) { 2738 if (CompressedKlassPointers::shift() != 0) { 2739 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2740 srli(dst, src, LogKlassAlignmentInBytes); 2741 } else { 2742 mv(dst, src); 2743 } 2744 return; 2745 } 2746 2747 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2748 CompressedKlassPointers::shift() == 0) { 2749 zero_extend(dst, src, 32); 2750 return; 2751 } 2752 2753 Register xbase = dst; 2754 if (dst == src) { 2755 xbase = tmp; 2756 } 2757 2758 assert_different_registers(src, xbase); 2759 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2760 sub(dst, src, xbase); 2761 if (CompressedKlassPointers::shift() != 0) { 2762 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2763 srli(dst, dst, LogKlassAlignmentInBytes); 2764 } 2765 } 2766 2767 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2768 decode_heap_oop_not_null(r, r); 2769 } 2770 2771 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2772 assert(UseCompressedOops, "should only be used for compressed headers"); 2773 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2774 // Cannot assert, unverified entry point counts instructions (see .ad file) 2775 // vtableStubs also counts instructions in pd_code_size_limit. 2776 // Also do not verify_oop as this is called by verify_oop. 2777 if (CompressedOops::shift() != 0) { 2778 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2779 slli(dst, src, LogMinObjAlignmentInBytes); 2780 if (CompressedOops::base() != nullptr) { 2781 add(dst, xheapbase, dst); 2782 } 2783 } else { 2784 assert(CompressedOops::base() == nullptr, "sanity"); 2785 mv(dst, src); 2786 } 2787 } 2788 2789 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2790 if (CompressedOops::base() == nullptr) { 2791 if (CompressedOops::shift() != 0 || d != s) { 2792 slli(d, s, CompressedOops::shift()); 2793 } 2794 } else { 2795 Label done; 2796 mv(d, s); 2797 beqz(s, done); 2798 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2799 bind(done); 2800 } 2801 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2802 } 2803 2804 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2805 Register tmp2, Register tmp3, DecoratorSet decorators) { 2806 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2807 } 2808 2809 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2810 Register tmp2, DecoratorSet decorators) { 2811 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2812 } 2813 2814 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2815 Register tmp2, DecoratorSet decorators) { 2816 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2817 } 2818 2819 // Used for storing nulls. 2820 void MacroAssembler::store_heap_oop_null(Address dst) { 2821 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2822 } 2823 2824 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2825 bool want_remainder, bool is_signed) 2826 { 2827 // Full implementation of Java idiv and irem. The function 2828 // returns the (pc) offset of the div instruction - may be needed 2829 // for implicit exceptions. 2830 // 2831 // input : rs1: dividend 2832 // rs2: divisor 2833 // 2834 // result: either 2835 // quotient (= rs1 idiv rs2) 2836 // remainder (= rs1 irem rs2) 2837 2838 2839 int idivl_offset = offset(); 2840 if (!want_remainder) { 2841 if (is_signed) { 2842 divw(result, rs1, rs2); 2843 } else { 2844 divuw(result, rs1, rs2); 2845 } 2846 } else { 2847 // result = rs1 % rs2; 2848 if (is_signed) { 2849 remw(result, rs1, rs2); 2850 } else { 2851 remuw(result, rs1, rs2); 2852 } 2853 } 2854 return idivl_offset; 2855 } 2856 2857 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2858 bool want_remainder, bool is_signed) 2859 { 2860 // Full implementation of Java ldiv and lrem. The function 2861 // returns the (pc) offset of the div instruction - may be needed 2862 // for implicit exceptions. 2863 // 2864 // input : rs1: dividend 2865 // rs2: divisor 2866 // 2867 // result: either 2868 // quotient (= rs1 idiv rs2) 2869 // remainder (= rs1 irem rs2) 2870 2871 int idivq_offset = offset(); 2872 if (!want_remainder) { 2873 if (is_signed) { 2874 div(result, rs1, rs2); 2875 } else { 2876 divu(result, rs1, rs2); 2877 } 2878 } else { 2879 // result = rs1 % rs2; 2880 if (is_signed) { 2881 rem(result, rs1, rs2); 2882 } else { 2883 remu(result, rs1, rs2); 2884 } 2885 } 2886 return idivq_offset; 2887 } 2888 2889 // Look up the method for a megamorpic invkkeinterface call. 2890 // The target method is determined by <intf_klass, itable_index>. 2891 // The receiver klass is in recv_klass. 2892 // On success, the result will be in method_result, and execution falls through. 2893 // On failure, execution transfers to the given label. 2894 void MacroAssembler::lookup_interface_method(Register recv_klass, 2895 Register intf_klass, 2896 RegisterOrConstant itable_index, 2897 Register method_result, 2898 Register scan_tmp, 2899 Label& L_no_such_interface, 2900 bool return_method) { 2901 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2902 assert_different_registers(method_result, intf_klass, scan_tmp); 2903 assert(recv_klass != method_result || !return_method, 2904 "recv_klass can be destroyed when mehtid isn't needed"); 2905 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2906 "caller must be same register for non-constant itable index as for method"); 2907 2908 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2909 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2910 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2911 int scan_step = itableOffsetEntry::size() * wordSize; 2912 int vte_size = vtableEntry::size_in_bytes(); 2913 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2914 2915 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2916 2917 // Could store the aligned, prescaled offset in the klass. 2918 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2919 add(scan_tmp, scan_tmp, vtable_base); 2920 2921 if (return_method) { 2922 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2923 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2924 if (itable_index.is_register()) { 2925 slli(t0, itable_index.as_register(), 3); 2926 } else { 2927 mv(t0, itable_index.as_constant() << 3); 2928 } 2929 add(recv_klass, recv_klass, t0); 2930 if (itentry_off) { 2931 add(recv_klass, recv_klass, itentry_off); 2932 } 2933 } 2934 2935 Label search, found_method; 2936 2937 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2938 beq(intf_klass, method_result, found_method); 2939 bind(search); 2940 // Check that the previous entry is non-null. A null entry means that 2941 // the receiver class doesn't implement the interface, and wasn't the 2942 // same as when the caller was compiled. 2943 beqz(method_result, L_no_such_interface, /* is_far */ true); 2944 addi(scan_tmp, scan_tmp, scan_step); 2945 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2946 bne(intf_klass, method_result, search); 2947 2948 bind(found_method); 2949 2950 // Got a hit. 2951 if (return_method) { 2952 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2953 add(method_result, recv_klass, scan_tmp); 2954 ld(method_result, Address(method_result)); 2955 } 2956 } 2957 2958 // Look up the method for a megamorphic invokeinterface call in a single pass over itable: 2959 // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData 2960 // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index 2961 // The target method is determined by <holder_klass, itable_index>. 2962 // The receiver klass is in recv_klass. 2963 // On success, the result will be in method_result, and execution falls through. 2964 // On failure, execution transfers to the given label. 2965 void MacroAssembler::lookup_interface_method_stub(Register recv_klass, 2966 Register holder_klass, 2967 Register resolved_klass, 2968 Register method_result, 2969 Register temp_itbl_klass, 2970 Register scan_temp, 2971 int itable_index, 2972 Label& L_no_such_interface) { 2973 // 'method_result' is only used as output register at the very end of this method. 2974 // Until then we can reuse it as 'holder_offset'. 2975 Register holder_offset = method_result; 2976 assert_different_registers(resolved_klass, recv_klass, holder_klass, temp_itbl_klass, scan_temp, holder_offset); 2977 2978 int vtable_start_offset_bytes = in_bytes(Klass::vtable_start_offset()); 2979 int scan_step = itableOffsetEntry::size() * wordSize; 2980 int ioffset_bytes = in_bytes(itableOffsetEntry::interface_offset()); 2981 int ooffset_bytes = in_bytes(itableOffsetEntry::offset_offset()); 2982 int itmentry_off_bytes = in_bytes(itableMethodEntry::method_offset()); 2983 const int vte_scale = exact_log2(vtableEntry::size_in_bytes()); 2984 2985 Label L_loop_search_resolved_entry, L_resolved_found, L_holder_found; 2986 2987 lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); 2988 add(recv_klass, recv_klass, vtable_start_offset_bytes + ioffset_bytes); 2989 // itableOffsetEntry[] itable = recv_klass + Klass::vtable_start_offset() 2990 // + sizeof(vtableEntry) * (recv_klass->_vtable_len); 2991 // scan_temp = &(itable[0]._interface) 2992 // temp_itbl_klass = itable[0]._interface; 2993 shadd(scan_temp, scan_temp, recv_klass, scan_temp, vte_scale); 2994 ld(temp_itbl_klass, Address(scan_temp)); 2995 mv(holder_offset, zr); 2996 2997 // Initial checks: 2998 // - if (holder_klass != resolved_klass), go to "scan for resolved" 2999 // - if (itable[0] == holder_klass), shortcut to "holder found" 3000 // - if (itable[0] == 0), no such interface 3001 bne(resolved_klass, holder_klass, L_loop_search_resolved_entry); 3002 beq(holder_klass, temp_itbl_klass, L_holder_found); 3003 beqz(temp_itbl_klass, L_no_such_interface); 3004 3005 // Loop: Look for holder_klass record in itable 3006 // do { 3007 // temp_itbl_klass = *(scan_temp += scan_step); 3008 // if (temp_itbl_klass == holder_klass) { 3009 // goto L_holder_found; // Found! 3010 // } 3011 // } while (temp_itbl_klass != 0); 3012 // goto L_no_such_interface // Not found. 3013 Label L_search_holder; 3014 bind(L_search_holder); 3015 add(scan_temp, scan_temp, scan_step); 3016 ld(temp_itbl_klass, Address(scan_temp)); 3017 beq(holder_klass, temp_itbl_klass, L_holder_found); 3018 bnez(temp_itbl_klass, L_search_holder); 3019 3020 j(L_no_such_interface); 3021 3022 // Loop: Look for resolved_class record in itable 3023 // while (true) { 3024 // temp_itbl_klass = *(scan_temp += scan_step); 3025 // if (temp_itbl_klass == 0) { 3026 // goto L_no_such_interface; 3027 // } 3028 // if (temp_itbl_klass == resolved_klass) { 3029 // goto L_resolved_found; // Found! 3030 // } 3031 // if (temp_itbl_klass == holder_klass) { 3032 // holder_offset = scan_temp; 3033 // } 3034 // } 3035 // 3036 Label L_loop_search_resolved; 3037 bind(L_loop_search_resolved); 3038 add(scan_temp, scan_temp, scan_step); 3039 ld(temp_itbl_klass, Address(scan_temp)); 3040 bind(L_loop_search_resolved_entry); 3041 beqz(temp_itbl_klass, L_no_such_interface); 3042 beq(resolved_klass, temp_itbl_klass, L_resolved_found); 3043 bne(holder_klass, temp_itbl_klass, L_loop_search_resolved); 3044 mv(holder_offset, scan_temp); 3045 j(L_loop_search_resolved); 3046 3047 // See if we already have a holder klass. If not, go and scan for it. 3048 bind(L_resolved_found); 3049 beqz(holder_offset, L_search_holder); 3050 mv(scan_temp, holder_offset); 3051 3052 // Finally, scan_temp contains holder_klass vtable offset 3053 bind(L_holder_found); 3054 lwu(method_result, Address(scan_temp, ooffset_bytes - ioffset_bytes)); 3055 add(recv_klass, recv_klass, itable_index * wordSize + itmentry_off_bytes 3056 - vtable_start_offset_bytes - ioffset_bytes); // substract offsets to restore the original value of recv_klass 3057 add(method_result, recv_klass, method_result); 3058 ld(method_result, Address(method_result)); 3059 } 3060 3061 // virtual method calling 3062 void MacroAssembler::lookup_virtual_method(Register recv_klass, 3063 RegisterOrConstant vtable_index, 3064 Register method_result) { 3065 const ByteSize base = Klass::vtable_start_offset(); 3066 assert(vtableEntry::size() * wordSize == 8, 3067 "adjust the scaling in the code below"); 3068 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 3069 3070 if (vtable_index.is_register()) { 3071 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 3072 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 3073 } else { 3074 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 3075 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 3076 } 3077 } 3078 3079 void MacroAssembler::membar(uint32_t order_constraint) { 3080 address prev = pc() - MacroAssembler::instruction_size; 3081 address last = code()->last_insn(); 3082 3083 if (last != nullptr && is_membar(last) && prev == last) { 3084 // We are merging two memory barrier instructions. On RISCV we 3085 // can do this simply by ORing them together. 3086 set_membar_kind(prev, get_membar_kind(prev) | order_constraint); 3087 BLOCK_COMMENT("merged membar"); 3088 } else { 3089 code()->set_last_insn(pc()); 3090 3091 uint32_t predecessor = 0; 3092 uint32_t successor = 0; 3093 3094 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 3095 fence(predecessor, successor); 3096 } 3097 } 3098 3099 // Form an address from base + offset in Rd. Rd my or may not 3100 // actually be used: you must use the Address that is returned. It 3101 // is up to you to ensure that the shift provided matches the size 3102 // of your data. 3103 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 3104 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 3105 return Address(base, byte_offset); 3106 } 3107 3108 assert_different_registers(Rd, base, noreg); 3109 3110 // Do it the hard way 3111 mv(Rd, byte_offset); 3112 add(Rd, base, Rd); 3113 return Address(Rd); 3114 } 3115 3116 void MacroAssembler::check_klass_subtype(Register sub_klass, 3117 Register super_klass, 3118 Register tmp_reg, 3119 Label& L_success) { 3120 Label L_failure; 3121 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 3122 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 3123 bind(L_failure); 3124 } 3125 3126 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 3127 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 3128 if (acquire) { 3129 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 3130 } 3131 if (at_return) { 3132 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 3133 } else { 3134 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 3135 bnez(t0, slow_path, true /* is_far */); 3136 } 3137 } 3138 3139 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 3140 Label &succeed, Label *fail) { 3141 assert_different_registers(addr, tmp, t0); 3142 assert_different_registers(newv, tmp, t0); 3143 assert_different_registers(oldv, tmp, t0); 3144 3145 // oldv holds comparison value 3146 // newv holds value to write in exchange 3147 // addr identifies memory word to compare against/update 3148 if (UseZacas) { 3149 mv(tmp, oldv); 3150 atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); 3151 beq(tmp, oldv, succeed); 3152 } else { 3153 Label retry_load, nope; 3154 bind(retry_load); 3155 // Load reserved from the memory location 3156 load_reserved(tmp, addr, int64, Assembler::aqrl); 3157 // Fail and exit if it is not what we expect 3158 bne(tmp, oldv, nope); 3159 // If the store conditional succeeds, tmp will be zero 3160 store_conditional(tmp, newv, addr, int64, Assembler::rl); 3161 beqz(tmp, succeed); 3162 // Retry only when the store conditional failed 3163 j(retry_load); 3164 3165 bind(nope); 3166 } 3167 3168 // neither amocas nor lr/sc have an implied barrier in the failing case 3169 membar(AnyAny); 3170 3171 mv(oldv, tmp); 3172 if (fail != nullptr) { 3173 j(*fail); 3174 } 3175 } 3176 3177 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 3178 Label &succeed, Label *fail) { 3179 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 3180 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 3181 } 3182 3183 void MacroAssembler::load_reserved(Register dst, 3184 Register addr, 3185 enum operand_size size, 3186 Assembler::Aqrl acquire) { 3187 switch (size) { 3188 case int64: 3189 lr_d(dst, addr, acquire); 3190 break; 3191 case int32: 3192 lr_w(dst, addr, acquire); 3193 break; 3194 case uint32: 3195 lr_w(dst, addr, acquire); 3196 zero_extend(dst, dst, 32); 3197 break; 3198 default: 3199 ShouldNotReachHere(); 3200 } 3201 } 3202 3203 void MacroAssembler::store_conditional(Register dst, 3204 Register new_val, 3205 Register addr, 3206 enum operand_size size, 3207 Assembler::Aqrl release) { 3208 switch (size) { 3209 case int64: 3210 sc_d(dst, new_val, addr, release); 3211 break; 3212 case int32: 3213 case uint32: 3214 sc_w(dst, new_val, addr, release); 3215 break; 3216 default: 3217 ShouldNotReachHere(); 3218 } 3219 } 3220 3221 3222 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 3223 Register new_val, 3224 enum operand_size size, 3225 Register tmp1, Register tmp2, Register tmp3) { 3226 assert(size == int8 || size == int16, "unsupported operand size"); 3227 3228 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 3229 3230 andi(shift, addr, 3); 3231 slli(shift, shift, 3); 3232 3233 andi(aligned_addr, addr, ~3); 3234 3235 if (size == int8) { 3236 mv(mask, 0xff); 3237 } else { 3238 // size == int16 case 3239 mv(mask, -1); 3240 zero_extend(mask, mask, 16); 3241 } 3242 sll(mask, mask, shift); 3243 3244 notr(not_mask, mask); 3245 3246 sll(expected, expected, shift); 3247 andr(expected, expected, mask); 3248 3249 sll(new_val, new_val, shift); 3250 andr(new_val, new_val, mask); 3251 } 3252 3253 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 3254 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, 3255 // which are forced to work with 4-byte aligned address. 3256 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 3257 Register new_val, 3258 enum operand_size size, 3259 Assembler::Aqrl acquire, Assembler::Aqrl release, 3260 Register result, bool result_as_bool, 3261 Register tmp1, Register tmp2, Register tmp3) { 3262 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3263 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3264 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3265 3266 Label retry, fail, done; 3267 3268 bind(retry); 3269 3270 if (UseZacas) { 3271 lw(old, aligned_addr); 3272 3273 // if old & mask != expected 3274 andr(tmp, old, mask); 3275 bne(tmp, expected, fail); 3276 3277 andr(tmp, old, not_mask); 3278 orr(tmp, tmp, new_val); 3279 3280 atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release); 3281 bne(tmp, old, retry); 3282 } else { 3283 lr_w(old, aligned_addr, acquire); 3284 andr(tmp, old, mask); 3285 bne(tmp, expected, fail); 3286 3287 andr(tmp, old, not_mask); 3288 orr(tmp, tmp, new_val); 3289 sc_w(tmp, tmp, aligned_addr, release); 3290 bnez(tmp, retry); 3291 } 3292 3293 if (result_as_bool) { 3294 mv(result, 1); 3295 j(done); 3296 3297 bind(fail); 3298 mv(result, zr); 3299 3300 bind(done); 3301 } else { 3302 andr(tmp, old, mask); 3303 3304 bind(fail); 3305 srl(result, tmp, shift); 3306 3307 if (size == int8) { 3308 sign_extend(result, result, 8); 3309 } else { 3310 // size == int16 case 3311 sign_extend(result, result, 16); 3312 } 3313 } 3314 } 3315 3316 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 3317 // the weak CAS stuff. The major difference is that it just failed when store conditional 3318 // failed. 3319 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 3320 Register new_val, 3321 enum operand_size size, 3322 Assembler::Aqrl acquire, Assembler::Aqrl release, 3323 Register result, 3324 Register tmp1, Register tmp2, Register tmp3) { 3325 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3326 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3327 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3328 3329 Label fail, done; 3330 3331 if (UseZacas) { 3332 lw(old, aligned_addr); 3333 3334 // if old & mask != expected 3335 andr(tmp, old, mask); 3336 bne(tmp, expected, fail); 3337 3338 andr(tmp, old, not_mask); 3339 orr(tmp, tmp, new_val); 3340 3341 atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release); 3342 bne(tmp, old, fail); 3343 } else { 3344 lr_w(old, aligned_addr, acquire); 3345 andr(tmp, old, mask); 3346 bne(tmp, expected, fail); 3347 3348 andr(tmp, old, not_mask); 3349 orr(tmp, tmp, new_val); 3350 sc_w(tmp, tmp, aligned_addr, release); 3351 bnez(tmp, fail); 3352 } 3353 3354 // Success 3355 mv(result, 1); 3356 j(done); 3357 3358 // Fail 3359 bind(fail); 3360 mv(result, zr); 3361 3362 bind(done); 3363 } 3364 3365 void MacroAssembler::cmpxchg(Register addr, Register expected, 3366 Register new_val, 3367 enum operand_size size, 3368 Assembler::Aqrl acquire, Assembler::Aqrl release, 3369 Register result, bool result_as_bool) { 3370 assert(size != int8 && size != int16, "unsupported operand size"); 3371 assert_different_registers(addr, t0); 3372 assert_different_registers(expected, t0); 3373 assert_different_registers(new_val, t0); 3374 3375 if (UseZacas) { 3376 if (result_as_bool) { 3377 mv(t0, expected); 3378 atomic_cas(t0, new_val, addr, size, acquire, release); 3379 xorr(t0, t0, expected); 3380 seqz(result, t0); 3381 } else { 3382 mv(result, expected); 3383 atomic_cas(result, new_val, addr, size, acquire, release); 3384 } 3385 return; 3386 } 3387 3388 Label retry_load, done, ne_done; 3389 bind(retry_load); 3390 load_reserved(t0, addr, size, acquire); 3391 bne(t0, expected, ne_done); 3392 store_conditional(t0, new_val, addr, size, release); 3393 bnez(t0, retry_load); 3394 3395 // equal, succeed 3396 if (result_as_bool) { 3397 mv(result, 1); 3398 } else { 3399 mv(result, expected); 3400 } 3401 j(done); 3402 3403 // not equal, failed 3404 bind(ne_done); 3405 if (result_as_bool) { 3406 mv(result, zr); 3407 } else { 3408 mv(result, t0); 3409 } 3410 3411 bind(done); 3412 } 3413 3414 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 3415 Register new_val, 3416 enum operand_size size, 3417 Assembler::Aqrl acquire, Assembler::Aqrl release, 3418 Register result) { 3419 if (UseZacas) { 3420 cmpxchg(addr, expected, new_val, size, acquire, release, result, true); 3421 return; 3422 } 3423 3424 assert_different_registers(addr, t0); 3425 assert_different_registers(expected, t0); 3426 assert_different_registers(new_val, t0); 3427 3428 Label fail, done; 3429 load_reserved(t0, addr, size, acquire); 3430 bne(t0, expected, fail); 3431 store_conditional(t0, new_val, addr, size, release); 3432 bnez(t0, fail); 3433 3434 // Success 3435 mv(result, 1); 3436 j(done); 3437 3438 // Fail 3439 bind(fail); 3440 mv(result, zr); 3441 3442 bind(done); 3443 } 3444 3445 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 3446 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 3447 prev = prev->is_valid() ? prev : zr; \ 3448 if (incr.is_register()) { \ 3449 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3450 } else { \ 3451 mv(t0, incr.as_constant()); \ 3452 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3453 } \ 3454 return; \ 3455 } 3456 3457 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 3458 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 3459 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 3460 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 3461 3462 #undef ATOMIC_OP 3463 3464 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 3465 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3466 prev = prev->is_valid() ? prev : zr; \ 3467 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3468 return; \ 3469 } 3470 3471 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 3472 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 3473 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 3474 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 3475 3476 #undef ATOMIC_XCHG 3477 3478 #define ATOMIC_XCHGU(OP1, OP2) \ 3479 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3480 atomic_##OP2(prev, newv, addr); \ 3481 zero_extend(prev, prev, 32); \ 3482 return; \ 3483 } 3484 3485 ATOMIC_XCHGU(xchgwu, xchgw) 3486 ATOMIC_XCHGU(xchgalwu, xchgalw) 3487 3488 #undef ATOMIC_XCHGU 3489 3490 #define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \ 3491 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3492 assert(UseZacas, "invariant"); \ 3493 prev = prev->is_valid() ? prev : zr; \ 3494 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3495 return; \ 3496 } 3497 3498 ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed) 3499 ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed) 3500 ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl) 3501 ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl) 3502 ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl) 3503 ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl) 3504 3505 #undef ATOMIC_CAS 3506 3507 #define ATOMIC_CASU(OP1, OP2) \ 3508 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3509 atomic_##OP2(prev, newv, addr); \ 3510 zero_extend(prev, prev, 32); \ 3511 return; \ 3512 } 3513 3514 ATOMIC_CASU(caswu, casw) 3515 ATOMIC_CASU(caslwu, caslw) 3516 ATOMIC_CASU(casalwu, casalw) 3517 3518 #undef ATOMIC_CASU 3519 3520 void MacroAssembler::atomic_cas( 3521 Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { 3522 switch (size) { 3523 case int64: 3524 switch ((Assembler::Aqrl)(acquire | release)) { 3525 case Assembler::relaxed: 3526 atomic_cas(prev, newv, addr); 3527 break; 3528 case Assembler::rl: 3529 atomic_casl(prev, newv, addr); 3530 break; 3531 case Assembler::aqrl: 3532 atomic_casal(prev, newv, addr); 3533 break; 3534 default: 3535 ShouldNotReachHere(); 3536 } 3537 break; 3538 case int32: 3539 switch ((Assembler::Aqrl)(acquire | release)) { 3540 case Assembler::relaxed: 3541 atomic_casw(prev, newv, addr); 3542 break; 3543 case Assembler::rl: 3544 atomic_caslw(prev, newv, addr); 3545 break; 3546 case Assembler::aqrl: 3547 atomic_casalw(prev, newv, addr); 3548 break; 3549 default: 3550 ShouldNotReachHere(); 3551 } 3552 break; 3553 case uint32: 3554 switch ((Assembler::Aqrl)(acquire | release)) { 3555 case Assembler::relaxed: 3556 atomic_caswu(prev, newv, addr); 3557 break; 3558 case Assembler::rl: 3559 atomic_caslwu(prev, newv, addr); 3560 break; 3561 case Assembler::aqrl: 3562 atomic_casalwu(prev, newv, addr); 3563 break; 3564 default: 3565 ShouldNotReachHere(); 3566 } 3567 break; 3568 default: 3569 ShouldNotReachHere(); 3570 } 3571 } 3572 3573 void MacroAssembler::far_jump(const Address &entry, Register tmp) { 3574 assert(CodeCache::find_blob(entry.target()) != nullptr, 3575 "destination of far jump not found in code cache"); 3576 assert(entry.rspec().type() == relocInfo::external_word_type 3577 || entry.rspec().type() == relocInfo::runtime_call_type 3578 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3579 // Fixed length: see MacroAssembler::far_branch_size() 3580 // We can use auipc + jr here because we know that the total size of 3581 // the code cache cannot exceed 2Gb. 3582 relocate(entry.rspec(), [&] { 3583 int64_t distance = entry.target() - pc(); 3584 int32_t offset = ((int32_t)distance << 20) >> 20; 3585 assert(is_valid_32bit_offset(distance), "Far jump using wrong instructions."); 3586 auipc(tmp, (int32_t)distance + 0x800); 3587 jr(tmp, offset); 3588 }); 3589 } 3590 3591 void MacroAssembler::far_call(const Address &entry, Register tmp) { 3592 assert(CodeCache::find_blob(entry.target()) != nullptr, 3593 "destination of far call not found in code cache"); 3594 assert(entry.rspec().type() == relocInfo::external_word_type 3595 || entry.rspec().type() == relocInfo::runtime_call_type 3596 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3597 // Fixed length: see MacroAssembler::far_branch_size() 3598 // We can use auipc + jalr here because we know that the total size of 3599 // the code cache cannot exceed 2Gb. 3600 relocate(entry.rspec(), [&] { 3601 int64_t distance = entry.target() - pc(); 3602 int32_t offset = ((int32_t)distance << 20) >> 20; 3603 assert(is_valid_32bit_offset(distance), "Far call using wrong instructions."); 3604 auipc(tmp, (int32_t)distance + 0x800); 3605 jalr(tmp, offset); 3606 }); 3607 } 3608 3609 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 3610 Register super_klass, 3611 Register tmp_reg, 3612 Label* L_success, 3613 Label* L_failure, 3614 Label* L_slow_path, 3615 Register super_check_offset) { 3616 assert_different_registers(sub_klass, super_klass, tmp_reg); 3617 bool must_load_sco = (super_check_offset == noreg); 3618 if (must_load_sco) { 3619 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 3620 } else { 3621 assert_different_registers(sub_klass, super_klass, super_check_offset); 3622 } 3623 3624 Label L_fallthrough; 3625 int label_nulls = 0; 3626 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3627 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3628 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3629 assert(label_nulls <= 1, "at most one null in batch"); 3630 3631 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3632 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3633 Address super_check_offset_addr(super_klass, sco_offset); 3634 3635 // Hacked jmp, which may only be used just before L_fallthrough. 3636 #define final_jmp(label) \ 3637 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3638 else j(label) /*omit semi*/ 3639 3640 // If the pointers are equal, we are done (e.g., String[] elements). 3641 // This self-check enables sharing of secondary supertype arrays among 3642 // non-primary types such as array-of-interface. Otherwise, each such 3643 // type would need its own customized SSA. 3644 // We move this check to the front of the fast path because many 3645 // type checks are in fact trivially successful in this manner, 3646 // so we get a nicely predicted branch right at the start of the check. 3647 beq(sub_klass, super_klass, *L_success); 3648 3649 // Check the supertype display: 3650 if (must_load_sco) { 3651 lwu(tmp_reg, super_check_offset_addr); 3652 super_check_offset = tmp_reg; 3653 } 3654 add(t0, sub_klass, super_check_offset); 3655 Address super_check_addr(t0); 3656 ld(t0, super_check_addr); // load displayed supertype 3657 3658 // This check has worked decisively for primary supers. 3659 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3660 // (Secondary supers are interfaces and very deeply nested subtypes.) 3661 // This works in the same check above because of a tricky aliasing 3662 // between the super_Cache and the primary super display elements. 3663 // (The 'super_check_addr' can address either, as the case requires.) 3664 // Note that the cache is updated below if it does not help us find 3665 // what we need immediately. 3666 // So if it was a primary super, we can just fail immediately. 3667 // Otherwise, it's the slow path for us (no success at this point). 3668 3669 beq(super_klass, t0, *L_success); 3670 mv(t1, sc_offset); 3671 if (L_failure == &L_fallthrough) { 3672 beq(super_check_offset, t1, *L_slow_path); 3673 } else { 3674 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 3675 final_jmp(*L_slow_path); 3676 } 3677 3678 bind(L_fallthrough); 3679 3680 #undef final_jmp 3681 } 3682 3683 // Scans count pointer sized words at [addr] for occurrence of value, 3684 // generic 3685 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 3686 Register tmp) { 3687 Label Lloop, Lexit; 3688 beqz(count, Lexit); 3689 bind(Lloop); 3690 ld(tmp, addr); 3691 beq(value, tmp, Lexit); 3692 add(addr, addr, wordSize); 3693 sub(count, count, 1); 3694 bnez(count, Lloop); 3695 bind(Lexit); 3696 } 3697 3698 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3699 Register super_klass, 3700 Register tmp1_reg, 3701 Register tmp2_reg, 3702 Label* L_success, 3703 Label* L_failure) { 3704 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3705 if (tmp2_reg != noreg) { 3706 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3707 } 3708 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3709 3710 Label L_fallthrough; 3711 int label_nulls = 0; 3712 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3713 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3714 3715 assert(label_nulls <= 1, "at most one null in the batch"); 3716 3717 // A couple of useful fields in sub_klass: 3718 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3719 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3720 Address secondary_supers_addr(sub_klass, ss_offset); 3721 Address super_cache_addr( sub_klass, sc_offset); 3722 3723 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3724 3725 // Do a linear scan of the secondary super-klass chain. 3726 // This code is rarely used, so simplicity is a virtue here. 3727 // The repne_scan instruction uses fixed registers, which we must spill. 3728 // Don't worry too much about pre-existing connections with the input regs. 3729 3730 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3731 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3732 3733 RegSet pushed_registers; 3734 if (!IS_A_TEMP(x12)) { 3735 pushed_registers += x12; 3736 } 3737 if (!IS_A_TEMP(x15)) { 3738 pushed_registers += x15; 3739 } 3740 3741 if (super_klass != x10) { 3742 if (!IS_A_TEMP(x10)) { 3743 pushed_registers += x10; 3744 } 3745 } 3746 3747 push_reg(pushed_registers, sp); 3748 3749 // Get super_klass value into x10 (even if it was in x15 or x12) 3750 mv(x10, super_klass); 3751 3752 #ifndef PRODUCT 3753 incrementw(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 3754 #endif // PRODUCT 3755 3756 // We will consult the secondary-super array. 3757 ld(x15, secondary_supers_addr); 3758 // Load the array length. 3759 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3760 // Skip to start of data. 3761 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3762 3763 // Set t0 to an obvious invalid value, falling through by default 3764 mv(t0, -1); 3765 // Scan X12 words at [X15] for an occurrence of X10. 3766 repne_scan(x15, x10, x12, t0); 3767 3768 // pop will restore x10, so we should use a temp register to keep its value 3769 mv(t1, x10); 3770 3771 // Unspill the temp registers: 3772 pop_reg(pushed_registers, sp); 3773 3774 bne(t1, t0, *L_failure); 3775 3776 // Success. Cache the super we found an proceed in triumph. 3777 sd(super_klass, super_cache_addr); 3778 3779 if (L_success != &L_fallthrough) { 3780 j(*L_success); 3781 } 3782 3783 #undef IS_A_TEMP 3784 3785 bind(L_fallthrough); 3786 } 3787 3788 // population_count variant for running without the CPOP 3789 // instruction, which was introduced with Zbb extension. 3790 void MacroAssembler::population_count(Register dst, Register src, 3791 Register tmp1, Register tmp2) { 3792 if (UsePopCountInstruction) { 3793 cpop(dst, src); 3794 } else { 3795 assert_different_registers(src, tmp1, tmp2); 3796 assert_different_registers(dst, tmp1, tmp2); 3797 Label loop, done; 3798 3799 mv(tmp1, src); 3800 // dst = 0; 3801 // while(tmp1 != 0) { 3802 // dst++; 3803 // tmp1 &= (tmp1 - 1); 3804 // } 3805 mv(dst, zr); 3806 beqz(tmp1, done); 3807 { 3808 bind(loop); 3809 addi(dst, dst, 1); 3810 addi(tmp2, tmp1, -1); 3811 andr(tmp1, tmp1, tmp2); 3812 bnez(tmp1, loop); 3813 } 3814 bind(done); 3815 } 3816 } 3817 3818 // Ensure that the inline code and the stub are using the same registers 3819 // as we need to call the stub from inline code when there is a collision 3820 // in the hashed lookup in the secondary supers array. 3821 #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, \ 3822 r_array_index, r_sub_klass, result, r_bitmap) \ 3823 do { \ 3824 assert(r_super_klass == x10 && \ 3825 r_array_base == x11 && \ 3826 r_array_length == x12 && \ 3827 (r_array_index == x13 || r_array_index == noreg) && \ 3828 (r_sub_klass == x14 || r_sub_klass == noreg) && \ 3829 (result == x15 || result == noreg) && \ 3830 (r_bitmap == x16 || r_bitmap == noreg), "registers must match riscv.ad"); \ 3831 } while(0) 3832 3833 // Return true: we succeeded in generating this code 3834 bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, 3835 Register r_super_klass, 3836 Register result, 3837 Register tmp1, 3838 Register tmp2, 3839 Register tmp3, 3840 Register tmp4, 3841 u1 super_klass_slot, 3842 bool stub_is_near) { 3843 assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0); 3844 3845 Label L_fallthrough; 3846 3847 BLOCK_COMMENT("lookup_secondary_supers_table {"); 3848 3849 const Register 3850 r_array_base = tmp1, // x11 3851 r_array_length = tmp2, // x12 3852 r_array_index = tmp3, // x13 3853 r_bitmap = tmp4; // x16 3854 3855 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 3856 r_array_index, r_sub_klass, result, r_bitmap); 3857 3858 u1 bit = super_klass_slot; 3859 3860 // Initialize result value to 1 which means mismatch. 3861 mv(result, 1); 3862 3863 ld(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); 3864 3865 // First check the bitmap to see if super_klass might be present. If 3866 // the bit is zero, we are certain that super_klass is not one of 3867 // the secondary supers. 3868 test_bit(t0, r_bitmap, bit); 3869 beqz(t0, L_fallthrough); 3870 3871 // Get the first array index that can contain super_klass into r_array_index. 3872 if (bit != 0) { 3873 slli(r_array_index, r_bitmap, (Klass::SECONDARY_SUPERS_TABLE_MASK - bit)); 3874 population_count(r_array_index, r_array_index, tmp1, tmp2); 3875 } else { 3876 mv(r_array_index, (u1)1); 3877 } 3878 3879 // We will consult the secondary-super array. 3880 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 3881 3882 // The value i in r_array_index is >= 1, so even though r_array_base 3883 // points to the length, we don't need to adjust it to point to the data. 3884 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code"); 3885 assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code"); 3886 3887 shadd(result, r_array_index, r_array_base, result, LogBytesPerWord); 3888 ld(result, Address(result)); 3889 xorr(result, result, r_super_klass); 3890 beqz(result, L_fallthrough); // Found a match 3891 3892 // Is there another entry to check? Consult the bitmap. 3893 test_bit(t0, r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); 3894 beqz(t0, L_fallthrough); 3895 3896 // Linear probe. 3897 if (bit != 0) { 3898 ror_imm(r_bitmap, r_bitmap, bit); 3899 } 3900 3901 // The slot we just inspected is at secondary_supers[r_array_index - 1]. 3902 // The next slot to be inspected, by the stub we're about to call, 3903 // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap 3904 // have been checked. 3905 rt_call(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); 3906 3907 BLOCK_COMMENT("} lookup_secondary_supers_table"); 3908 3909 bind(L_fallthrough); 3910 3911 if (VerifySecondarySupers) { 3912 verify_secondary_supers_table(r_sub_klass, r_super_klass, // x14, x10 3913 result, tmp1, tmp2, tmp3); // x15, x11, x12, x13 3914 } 3915 return true; 3916 } 3917 3918 // Called by code generated by check_klass_subtype_slow_path 3919 // above. This is called when there is a collision in the hashed 3920 // lookup in the secondary supers array. 3921 void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, 3922 Register r_array_base, 3923 Register r_array_index, 3924 Register r_bitmap, 3925 Register result, 3926 Register tmp1) { 3927 assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, tmp1, result, t0); 3928 3929 const Register 3930 r_array_length = tmp1, 3931 r_sub_klass = noreg; // unused 3932 3933 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 3934 r_array_index, r_sub_klass, result, r_bitmap); 3935 3936 Label L_matched, L_fallthrough, L_bitmap_full; 3937 3938 // Initialize result value to 1 which means mismatch. 3939 mv(result, 1); 3940 3941 // Load the array length. 3942 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 3943 // And adjust the array base to point to the data. 3944 // NB! Effectively increments current slot index by 1. 3945 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, ""); 3946 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 3947 3948 // Check if bitmap is SECONDARY_SUPERS_BITMAP_FULL 3949 assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "Adjust this code"); 3950 subw(t0, r_array_length, Klass::SECONDARY_SUPERS_TABLE_SIZE - 2); 3951 bgtz(t0, L_bitmap_full); 3952 3953 // NB! Our caller has checked bits 0 and 1 in the bitmap. The 3954 // current slot (at secondary_supers[r_array_index]) has not yet 3955 // been inspected, and r_array_index may be out of bounds if we 3956 // wrapped around the end of the array. 3957 3958 { // This is conventional linear probing, but instead of terminating 3959 // when a null entry is found in the table, we maintain a bitmap 3960 // in which a 0 indicates missing entries. 3961 // The check above guarantees there are 0s in the bitmap, so the loop 3962 // eventually terminates. 3963 Label L_loop; 3964 bind(L_loop); 3965 3966 // Check for wraparound. 3967 Label skip; 3968 blt(r_array_index, r_array_length, skip); 3969 mv(r_array_index, zr); 3970 bind(skip); 3971 3972 shadd(t0, r_array_index, r_array_base, t0, LogBytesPerWord); 3973 ld(t0, Address(t0)); 3974 beq(t0, r_super_klass, L_matched); 3975 3976 test_bit(t0, r_bitmap, 2); // look-ahead check (Bit 2); result is non-zero 3977 beqz(t0, L_fallthrough); 3978 3979 ror_imm(r_bitmap, r_bitmap, 1); 3980 addi(r_array_index, r_array_index, 1); 3981 j(L_loop); 3982 } 3983 3984 { // Degenerate case: more than 64 secondary supers. 3985 // FIXME: We could do something smarter here, maybe a vectorized 3986 // comparison or a binary search, but is that worth any added 3987 // complexity? 3988 bind(L_bitmap_full); 3989 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 3990 bne(r_super_klass, t0, L_fallthrough); 3991 } 3992 3993 bind(L_matched); 3994 mv(result, zr); 3995 3996 bind(L_fallthrough); 3997 } 3998 3999 // Make sure that the hashed lookup and a linear scan agree. 4000 void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, 4001 Register r_super_klass, 4002 Register result, 4003 Register tmp1, 4004 Register tmp2, 4005 Register tmp3) { 4006 assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0); 4007 4008 const Register 4009 r_array_base = tmp1, // X11 4010 r_array_length = tmp2, // X12 4011 r_array_index = noreg, // unused 4012 r_bitmap = noreg; // unused 4013 4014 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 4015 r_array_index, r_sub_klass, result, r_bitmap); 4016 4017 BLOCK_COMMENT("verify_secondary_supers_table {"); 4018 4019 // We will consult the secondary-super array. 4020 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 4021 4022 // Load the array length. 4023 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 4024 // And adjust the array base to point to the data. 4025 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 4026 4027 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 4028 Label failed; 4029 mv(tmp3, 1); 4030 bne(r_super_klass, t0, failed); 4031 mv(tmp3, zr); 4032 bind(failed); 4033 4034 snez(result, result); // normalize result to 0/1 for comparison 4035 4036 Label passed; 4037 beq(tmp3, result, passed); 4038 { 4039 mv(x10, r_super_klass); 4040 mv(x11, r_sub_klass); 4041 mv(x12, tmp3); 4042 mv(x13, result); 4043 mv(x14, (address)("mismatch")); 4044 rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure)); 4045 should_not_reach_here(); 4046 } 4047 bind(passed); 4048 4049 BLOCK_COMMENT("} verify_secondary_supers_table"); 4050 } 4051 4052 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 4053 void MacroAssembler::tlab_allocate(Register obj, 4054 Register var_size_in_bytes, 4055 int con_size_in_bytes, 4056 Register tmp1, 4057 Register tmp2, 4058 Label& slow_case, 4059 bool is_far) { 4060 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4061 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 4062 } 4063 4064 // get_thread() can be called anywhere inside generated code so we 4065 // need to save whatever non-callee save context might get clobbered 4066 // by the call to Thread::current() or, indeed, the call setup code. 4067 void MacroAssembler::get_thread(Register thread) { 4068 // save all call-clobbered regs except thread 4069 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 4070 RegSet::range(x28, x31) + ra - thread; 4071 push_reg(saved_regs, sp); 4072 4073 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 4074 jalr(ra); 4075 if (thread != c_rarg0) { 4076 mv(thread, c_rarg0); 4077 } 4078 4079 // restore pushed registers 4080 pop_reg(saved_regs, sp); 4081 } 4082 4083 void MacroAssembler::load_byte_map_base(Register reg) { 4084 CardTable::CardValue* byte_map_base = 4085 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 4086 mv(reg, (uint64_t)byte_map_base); 4087 } 4088 4089 void MacroAssembler::build_frame(int framesize) { 4090 assert(framesize >= 2, "framesize must include space for FP/RA"); 4091 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4092 sub(sp, sp, framesize); 4093 sd(fp, Address(sp, framesize - 2 * wordSize)); 4094 sd(ra, Address(sp, framesize - wordSize)); 4095 if (PreserveFramePointer) { add(fp, sp, framesize); } 4096 } 4097 4098 void MacroAssembler::remove_frame(int framesize) { 4099 assert(framesize >= 2, "framesize must include space for FP/RA"); 4100 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4101 ld(fp, Address(sp, framesize - 2 * wordSize)); 4102 ld(ra, Address(sp, framesize - wordSize)); 4103 add(sp, sp, framesize); 4104 } 4105 4106 void MacroAssembler::reserved_stack_check() { 4107 // testing if reserved zone needs to be enabled 4108 Label no_reserved_zone_enabling; 4109 4110 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 4111 bltu(sp, t0, no_reserved_zone_enabling); 4112 4113 enter(); // RA and FP are live. 4114 mv(c_rarg0, xthread); 4115 rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 4116 leave(); 4117 4118 // We have already removed our own frame. 4119 // throw_delayed_StackOverflowError will think that it's been 4120 // called by our caller. 4121 la(t0, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry())); 4122 jr(t0); 4123 should_not_reach_here(); 4124 4125 bind(no_reserved_zone_enabling); 4126 } 4127 4128 // Move the address of the polling page into dest. 4129 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 4130 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 4131 } 4132 4133 // Read the polling page. The address of the polling page must 4134 // already be in r. 4135 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 4136 relocate(rtype, [&] { 4137 lwu(zr, Address(r, offset)); 4138 }); 4139 } 4140 4141 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 4142 #ifdef ASSERT 4143 { 4144 ThreadInVMfromUnknown tiv; 4145 assert (UseCompressedOops, "should only be used for compressed oops"); 4146 assert (Universe::heap() != nullptr, "java heap should be initialized"); 4147 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4148 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 4149 } 4150 #endif 4151 int oop_index = oop_recorder()->find_index(obj); 4152 relocate(oop_Relocation::spec(oop_index), [&] { 4153 li32(dst, 0xDEADBEEF); 4154 }); 4155 zero_extend(dst, dst, 32); 4156 } 4157 4158 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 4159 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 4160 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4161 int index = oop_recorder()->find_index(k); 4162 assert(!Universe::heap()->is_in(k), "should not be an oop"); 4163 4164 narrowKlass nk = CompressedKlassPointers::encode(k); 4165 relocate(metadata_Relocation::spec(index), [&] { 4166 li32(dst, nk); 4167 }); 4168 zero_extend(dst, dst, 32); 4169 } 4170 4171 // Maybe emit a call via a trampoline. If the code cache is small 4172 // trampolines won't be emitted. 4173 address MacroAssembler::trampoline_call(Address entry) { 4174 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4175 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4176 entry.rspec().type() == relocInfo::static_call_type || 4177 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4178 4179 address target = entry.target(); 4180 4181 // We need a trampoline if branches are far. 4182 if (!in_scratch_emit_size()) { 4183 if (entry.rspec().type() == relocInfo::runtime_call_type) { 4184 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 4185 code()->share_trampoline_for(entry.target(), offset()); 4186 } else { 4187 address stub = emit_trampoline_stub(offset(), target); 4188 if (stub == nullptr) { 4189 postcond(pc() == badAddress); 4190 return nullptr; // CodeCache is full 4191 } 4192 } 4193 } 4194 target = pc(); 4195 4196 address call_pc = pc(); 4197 #ifdef ASSERT 4198 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4199 assert_alignment(call_pc); 4200 } 4201 #endif 4202 relocate(entry.rspec(), [&] { 4203 jump_link(target, t0); 4204 }); 4205 4206 postcond(pc() != badAddress); 4207 return call_pc; 4208 } 4209 4210 address MacroAssembler::load_and_call(Address entry) { 4211 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4212 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4213 entry.rspec().type() == relocInfo::static_call_type || 4214 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4215 4216 address target = entry.target(); 4217 4218 if (!in_scratch_emit_size()) { 4219 address stub = emit_address_stub(offset(), target); 4220 if (stub == nullptr) { 4221 postcond(pc() == badAddress); 4222 return nullptr; // CodeCache is full 4223 } 4224 } 4225 4226 address call_pc = pc(); 4227 #ifdef ASSERT 4228 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4229 assert_alignment(call_pc); 4230 } 4231 #endif 4232 relocate(entry.rspec(), [&] { 4233 load_link_jump(target); 4234 }); 4235 4236 postcond(pc() != badAddress); 4237 return call_pc; 4238 } 4239 4240 address MacroAssembler::ic_call(address entry, jint method_index) { 4241 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 4242 IncompressibleRegion ir(this); // relocations 4243 movptr(t1, (address)Universe::non_oop_word(), t0); 4244 assert_cond(entry != nullptr); 4245 return reloc_call(Address(entry, rh)); 4246 } 4247 4248 int MacroAssembler::ic_check_size() { 4249 // No compressed 4250 return (MacroAssembler::instruction_size * (2 /* 2 loads */ + 1 /* branch */)) + 4251 far_branch_size(); 4252 } 4253 4254 int MacroAssembler::ic_check(int end_alignment) { 4255 IncompressibleRegion ir(this); 4256 Register receiver = j_rarg0; 4257 Register data = t1; 4258 4259 Register tmp1 = t0; // t0 always scratch 4260 // t2 is saved on call, thus should have been saved before this check. 4261 // Hence we can clobber it. 4262 Register tmp2 = t2; 4263 4264 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 4265 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 4266 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 4267 // before the inline cache check here, and not after 4268 align(end_alignment, ic_check_size()); 4269 int uep_offset = offset(); 4270 4271 if (UseCompressedClassPointers) { 4272 lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4273 lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4274 } else { 4275 ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4276 ld(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4277 } 4278 4279 Label ic_hit; 4280 beq(tmp1, tmp2, ic_hit); 4281 // Note, far_jump is not fixed size. 4282 // Is this ever generates a movptr alignment/size will be off. 4283 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 4284 bind(ic_hit); 4285 4286 assert((offset() % end_alignment) == 0, "Misaligned verified entry point."); 4287 return uep_offset; 4288 } 4289 4290 address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) { 4291 address stub = start_a_stub(max_reloc_call_stub_size()); 4292 if (stub == nullptr) { 4293 return nullptr; // CodeBuffer::expand failed 4294 } 4295 4296 // We are always 4-byte aligned here. 4297 assert_alignment(pc()); 4298 4299 // Make sure the address of destination 8-byte aligned. 4300 align(wordSize, 0); 4301 4302 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4303 insts_call_instruction_offset); 4304 const int stub_start_offset = offset(); 4305 relocate(rh, [&] { 4306 assert(offset() - stub_start_offset == 0, 4307 "%ld - %ld == %ld : should be", (long)offset(), (long)stub_start_offset, (long)0); 4308 assert(offset() % wordSize == 0, "bad alignment"); 4309 emit_int64((int64_t)dest); 4310 }); 4311 4312 const address stub_start_addr = addr_at(stub_start_offset); 4313 end_a_stub(); 4314 4315 return stub_start_addr; 4316 } 4317 4318 // Emit a trampoline stub for a call to a target which is too far away. 4319 // 4320 // code sequences: 4321 // 4322 // call-site: 4323 // branch-and-link to <destination> or <trampoline stub> 4324 // 4325 // Related trampoline stub for this call site in the stub section: 4326 // load the call target from the constant pool 4327 // branch (RA still points to the call site above) 4328 4329 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 4330 address dest) { 4331 // Max stub size: alignment nop, TrampolineStub. 4332 address stub = start_a_stub(max_reloc_call_stub_size()); 4333 if (stub == nullptr) { 4334 return nullptr; // CodeBuffer::expand failed 4335 } 4336 4337 assert(UseTrampolines, "Must be using trampos."); 4338 4339 // We are always 4-byte aligned here. 4340 assert_alignment(pc()); 4341 4342 // Create a trampoline stub relocation which relates this trampoline stub 4343 // with the call instruction at insts_call_instruction_offset in the 4344 // instructions code-section. 4345 4346 // Make sure the address of destination 8-byte aligned after 3 instructions. 4347 align(wordSize, MacroAssembler::NativeShortCall::trampoline_data_offset); 4348 4349 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4350 insts_call_instruction_offset); 4351 const int stub_start_offset = offset(); 4352 relocate(rh, [&] { 4353 // Now, create the trampoline stub's code: 4354 // - load the call 4355 // - call 4356 Label target; 4357 ld(t0, target); // auipc + ld 4358 jr(t0); // jalr 4359 bind(target); 4360 assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset, 4361 "should be"); 4362 assert(offset() % wordSize == 0, "bad alignment"); 4363 emit_int64((int64_t)dest); 4364 }); 4365 4366 const address stub_start_addr = addr_at(stub_start_offset); 4367 4368 end_a_stub(); 4369 4370 return stub_start_addr; 4371 } 4372 4373 int MacroAssembler::max_reloc_call_stub_size() { 4374 // Max stub size: alignment nop, TrampolineStub. 4375 if (UseTrampolines) { 4376 return instruction_size + MacroAssembler::NativeShortCall::trampoline_size; 4377 } 4378 return instruction_size + wordSize; 4379 } 4380 4381 int MacroAssembler::static_call_stub_size() { 4382 // (lui, addi, slli, addi, slli, addi) + (lui + lui + slli + add) + jalr 4383 return 11 * MacroAssembler::instruction_size; 4384 } 4385 4386 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 4387 switch (dst.getMode()) { 4388 case Address::base_plus_offset: 4389 // This is the expected mode, although we allow all the other 4390 // forms below. 4391 return form_address(tmp, dst.base(), dst.offset()); 4392 default: 4393 la(tmp, dst); 4394 return Address(tmp); 4395 } 4396 } 4397 4398 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4399 assert(((dst.getMode() == Address::base_plus_offset && 4400 is_simm12(dst.offset())) || is_simm12(value)), 4401 "invalid value and address mode combination"); 4402 Address adr = add_memory_helper(dst, tmp2); 4403 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4404 ld(tmp1, adr); 4405 add(tmp1, tmp1, value, tmp2); 4406 sd(tmp1, adr); 4407 } 4408 4409 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4410 assert(((dst.getMode() == Address::base_plus_offset && 4411 is_simm12(dst.offset())) || is_simm12(value)), 4412 "invalid value and address mode combination"); 4413 Address adr = add_memory_helper(dst, tmp2); 4414 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4415 lwu(tmp1, adr); 4416 addw(tmp1, tmp1, value, tmp2); 4417 sw(tmp1, adr); 4418 } 4419 4420 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4421 assert(((dst.getMode() == Address::base_plus_offset && 4422 is_simm12(dst.offset())) || is_simm12(value)), 4423 "invalid value and address mode combination"); 4424 Address adr = add_memory_helper(dst, tmp2); 4425 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4426 ld(tmp1, adr); 4427 sub(tmp1, tmp1, value, tmp2); 4428 sd(tmp1, adr); 4429 } 4430 4431 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4432 assert(((dst.getMode() == Address::base_plus_offset && 4433 is_simm12(dst.offset())) || is_simm12(value)), 4434 "invalid value and address mode combination"); 4435 Address adr = add_memory_helper(dst, tmp2); 4436 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4437 lwu(tmp1, adr); 4438 subw(tmp1, tmp1, value, tmp2); 4439 sw(tmp1, adr); 4440 } 4441 4442 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 4443 assert_different_registers(src1, t0); 4444 relocate(src2.rspec(), [&] { 4445 int32_t offset; 4446 la(t0, src2.target(), offset); 4447 ld(t0, Address(t0, offset)); 4448 }); 4449 beq(src1, t0, equal); 4450 } 4451 4452 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 4453 load_method_holder(result, method); 4454 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 4455 } 4456 4457 void MacroAssembler::load_method_holder(Register holder, Register method) { 4458 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 4459 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 4460 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 4461 } 4462 4463 // string indexof 4464 // compute index by trailing zeros 4465 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 4466 Register match_mask, Register result, 4467 Register ch2, Register tmp, 4468 bool haystack_isL) { 4469 int haystack_chr_shift = haystack_isL ? 0 : 1; 4470 srl(match_mask, match_mask, trailing_zeros); 4471 srli(match_mask, match_mask, 1); 4472 srli(tmp, trailing_zeros, LogBitsPerByte); 4473 if (!haystack_isL) andi(tmp, tmp, 0xE); 4474 add(haystack, haystack, tmp); 4475 ld(ch2, Address(haystack)); 4476 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 4477 add(result, result, tmp); 4478 } 4479 4480 // string indexof 4481 // Find pattern element in src, compute match mask, 4482 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 4483 // match mask patterns and corresponding indices would be like: 4484 // - 0x8080808080808080 (Latin1) 4485 // - 7 6 5 4 3 2 1 0 (match index) 4486 // - 0x8000800080008000 (UTF16) 4487 // - 3 2 1 0 (match index) 4488 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 4489 Register mask1, Register mask2) { 4490 xorr(src, pattern, src); 4491 sub(match_mask, src, mask1); 4492 orr(src, src, mask2); 4493 notr(src, src); 4494 andr(match_mask, match_mask, src); 4495 } 4496 4497 #ifdef COMPILER2 4498 // Code for BigInteger::mulAdd intrinsic 4499 // out = x10 4500 // in = x11 4501 // offset = x12 (already out.length-offset) 4502 // len = x13 4503 // k = x14 4504 // tmp = x28 4505 // 4506 // pseudo code from java implementation: 4507 // long kLong = k & LONG_MASK; 4508 // carry = 0; 4509 // offset = out.length-offset - 1; 4510 // for (int j = len - 1; j >= 0; j--) { 4511 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 4512 // out[offset--] = (int)product; 4513 // carry = product >>> 32; 4514 // } 4515 // return (int)carry; 4516 void MacroAssembler::mul_add(Register out, Register in, Register offset, 4517 Register len, Register k, Register tmp) { 4518 Label L_tail_loop, L_unroll, L_end; 4519 mv(tmp, out); 4520 mv(out, zr); 4521 blez(len, L_end); 4522 zero_extend(k, k, 32); 4523 slliw(t0, offset, LogBytesPerInt); 4524 add(offset, tmp, t0); 4525 slliw(t0, len, LogBytesPerInt); 4526 add(in, in, t0); 4527 4528 const int unroll = 8; 4529 mv(tmp, unroll); 4530 blt(len, tmp, L_tail_loop); 4531 bind(L_unroll); 4532 for (int i = 0; i < unroll; i++) { 4533 sub(in, in, BytesPerInt); 4534 lwu(t0, Address(in, 0)); 4535 mul(t1, t0, k); 4536 add(t0, t1, out); 4537 sub(offset, offset, BytesPerInt); 4538 lwu(t1, Address(offset, 0)); 4539 add(t0, t0, t1); 4540 sw(t0, Address(offset, 0)); 4541 srli(out, t0, 32); 4542 } 4543 subw(len, len, tmp); 4544 bge(len, tmp, L_unroll); 4545 4546 bind(L_tail_loop); 4547 blez(len, L_end); 4548 sub(in, in, BytesPerInt); 4549 lwu(t0, Address(in, 0)); 4550 mul(t1, t0, k); 4551 add(t0, t1, out); 4552 sub(offset, offset, BytesPerInt); 4553 lwu(t1, Address(offset, 0)); 4554 add(t0, t0, t1); 4555 sw(t0, Address(offset, 0)); 4556 srli(out, t0, 32); 4557 subw(len, len, 1); 4558 j(L_tail_loop); 4559 4560 bind(L_end); 4561 } 4562 4563 // Multiply and multiply-accumulate unsigned 64-bit registers. 4564 void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) { 4565 assert_different_registers(prod_lo, prod_hi); 4566 4567 mul(prod_lo, n, m); 4568 mulhu(prod_hi, n, m); 4569 } 4570 4571 void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n, 4572 Register m, Register tmp1, Register tmp2) { 4573 assert_different_registers(sum_lo, sum_hi); 4574 assert_different_registers(sum_hi, tmp2); 4575 4576 wide_mul(tmp1, tmp2, n, m); 4577 cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1 4578 adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi 4579 } 4580 4581 // add two unsigned input and output carry 4582 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 4583 { 4584 assert_different_registers(dst, carry); 4585 assert_different_registers(dst, src2); 4586 add(dst, src1, src2); 4587 sltu(carry, dst, src2); 4588 } 4589 4590 // add two input with carry 4591 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 4592 assert_different_registers(dst, carry); 4593 add(dst, src1, src2); 4594 add(dst, dst, carry); 4595 } 4596 4597 // add two unsigned input with carry and output carry 4598 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 4599 assert_different_registers(dst, src2); 4600 adc(dst, src1, src2, carry); 4601 sltu(carry, dst, src2); 4602 } 4603 4604 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 4605 Register src1, Register src2, Register carry) { 4606 cad(dest_lo, dest_lo, src1, carry); 4607 add(dest_hi, dest_hi, carry); 4608 cad(dest_lo, dest_lo, src2, carry); 4609 add(final_dest_hi, dest_hi, carry); 4610 } 4611 4612 /** 4613 * Multiply 32 bit by 32 bit first loop. 4614 */ 4615 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 4616 Register y, Register y_idx, Register z, 4617 Register carry, Register product, 4618 Register idx, Register kdx) { 4619 // jlong carry, x[], y[], z[]; 4620 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4621 // long product = y[idx] * x[xstart] + carry; 4622 // z[kdx] = (int)product; 4623 // carry = product >>> 32; 4624 // } 4625 // z[xstart] = (int)carry; 4626 4627 Label L_first_loop, L_first_loop_exit; 4628 blez(idx, L_first_loop_exit); 4629 4630 shadd(t0, xstart, x, t0, LogBytesPerInt); 4631 lwu(x_xstart, Address(t0, 0)); 4632 4633 bind(L_first_loop); 4634 subw(idx, idx, 1); 4635 shadd(t0, idx, y, t0, LogBytesPerInt); 4636 lwu(y_idx, Address(t0, 0)); 4637 mul(product, x_xstart, y_idx); 4638 add(product, product, carry); 4639 srli(carry, product, 32); 4640 subw(kdx, kdx, 1); 4641 shadd(t0, kdx, z, t0, LogBytesPerInt); 4642 sw(product, Address(t0, 0)); 4643 bgtz(idx, L_first_loop); 4644 4645 bind(L_first_loop_exit); 4646 } 4647 4648 /** 4649 * Multiply 64 bit by 64 bit first loop. 4650 */ 4651 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 4652 Register y, Register y_idx, Register z, 4653 Register carry, Register product, 4654 Register idx, Register kdx) { 4655 // 4656 // jlong carry, x[], y[], z[]; 4657 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4658 // huge_128 product = y[idx] * x[xstart] + carry; 4659 // z[kdx] = (jlong)product; 4660 // carry = (jlong)(product >>> 64); 4661 // } 4662 // z[xstart] = carry; 4663 // 4664 4665 Label L_first_loop, L_first_loop_exit; 4666 Label L_one_x, L_one_y, L_multiply; 4667 4668 subw(xstart, xstart, 1); 4669 bltz(xstart, L_one_x); 4670 4671 shadd(t0, xstart, x, t0, LogBytesPerInt); 4672 ld(x_xstart, Address(t0, 0)); 4673 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 4674 4675 bind(L_first_loop); 4676 subw(idx, idx, 1); 4677 bltz(idx, L_first_loop_exit); 4678 subw(idx, idx, 1); 4679 bltz(idx, L_one_y); 4680 4681 shadd(t0, idx, y, t0, LogBytesPerInt); 4682 ld(y_idx, Address(t0, 0)); 4683 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 4684 bind(L_multiply); 4685 4686 mulhu(t0, x_xstart, y_idx); 4687 mul(product, x_xstart, y_idx); 4688 cad(product, product, carry, t1); 4689 adc(carry, t0, zr, t1); 4690 4691 subw(kdx, kdx, 2); 4692 ror_imm(product, product, 32); // back to big-endian 4693 shadd(t0, kdx, z, t0, LogBytesPerInt); 4694 sd(product, Address(t0, 0)); 4695 4696 j(L_first_loop); 4697 4698 bind(L_one_y); 4699 lwu(y_idx, Address(y, 0)); 4700 j(L_multiply); 4701 4702 bind(L_one_x); 4703 lwu(x_xstart, Address(x, 0)); 4704 j(L_first_loop); 4705 4706 bind(L_first_loop_exit); 4707 } 4708 4709 /** 4710 * Multiply 128 bit by 128 bit. Unrolled inner loop. 4711 * 4712 */ 4713 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 4714 Register carry, Register carry2, 4715 Register idx, Register jdx, 4716 Register yz_idx1, Register yz_idx2, 4717 Register tmp, Register tmp3, Register tmp4, 4718 Register tmp6, Register product_hi) { 4719 // jlong carry, x[], y[], z[]; 4720 // int kdx = xstart+1; 4721 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 4722 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 4723 // jlong carry2 = (jlong)(tmp3 >>> 64); 4724 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 4725 // carry = (jlong)(tmp4 >>> 64); 4726 // z[kdx+idx+1] = (jlong)tmp3; 4727 // z[kdx+idx] = (jlong)tmp4; 4728 // } 4729 // idx += 2; 4730 // if (idx > 0) { 4731 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 4732 // z[kdx+idx] = (jlong)yz_idx1; 4733 // carry = (jlong)(yz_idx1 >>> 64); 4734 // } 4735 // 4736 4737 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 4738 4739 srliw(jdx, idx, 2); 4740 4741 bind(L_third_loop); 4742 4743 subw(jdx, jdx, 1); 4744 bltz(jdx, L_third_loop_exit); 4745 subw(idx, idx, 4); 4746 4747 shadd(t0, idx, y, t0, LogBytesPerInt); 4748 ld(yz_idx2, Address(t0, 0)); 4749 ld(yz_idx1, Address(t0, wordSize)); 4750 4751 shadd(tmp6, idx, z, t0, LogBytesPerInt); 4752 4753 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 4754 ror_imm(yz_idx2, yz_idx2, 32); 4755 4756 ld(t1, Address(tmp6, 0)); 4757 ld(t0, Address(tmp6, wordSize)); 4758 4759 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4760 mulhu(tmp4, product_hi, yz_idx1); 4761 4762 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 4763 ror_imm(t1, t1, 32, tmp); 4764 4765 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 4766 mulhu(carry2, product_hi, yz_idx2); 4767 4768 cad(tmp3, tmp3, carry, carry); 4769 adc(tmp4, tmp4, zr, carry); 4770 cad(tmp3, tmp3, t0, t0); 4771 cadc(tmp4, tmp4, tmp, t0); 4772 adc(carry, carry2, zr, t0); 4773 cad(tmp4, tmp4, t1, carry2); 4774 adc(carry, carry, zr, carry2); 4775 4776 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 4777 ror_imm(tmp4, tmp4, 32); 4778 sd(tmp4, Address(tmp6, 0)); 4779 sd(tmp3, Address(tmp6, wordSize)); 4780 4781 j(L_third_loop); 4782 4783 bind(L_third_loop_exit); 4784 4785 andi(idx, idx, 0x3); 4786 beqz(idx, L_post_third_loop_done); 4787 4788 Label L_check_1; 4789 subw(idx, idx, 2); 4790 bltz(idx, L_check_1); 4791 4792 shadd(t0, idx, y, t0, LogBytesPerInt); 4793 ld(yz_idx1, Address(t0, 0)); 4794 ror_imm(yz_idx1, yz_idx1, 32); 4795 4796 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4797 mulhu(tmp4, product_hi, yz_idx1); 4798 4799 shadd(t0, idx, z, t0, LogBytesPerInt); 4800 ld(yz_idx2, Address(t0, 0)); 4801 ror_imm(yz_idx2, yz_idx2, 32, tmp); 4802 4803 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 4804 4805 ror_imm(tmp3, tmp3, 32, tmp); 4806 sd(tmp3, Address(t0, 0)); 4807 4808 bind(L_check_1); 4809 4810 andi(idx, idx, 0x1); 4811 subw(idx, idx, 1); 4812 bltz(idx, L_post_third_loop_done); 4813 shadd(t0, idx, y, t0, LogBytesPerInt); 4814 lwu(tmp4, Address(t0, 0)); 4815 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 4816 mulhu(carry2, tmp4, product_hi); 4817 4818 shadd(t0, idx, z, t0, LogBytesPerInt); 4819 lwu(tmp4, Address(t0, 0)); 4820 4821 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 4822 4823 shadd(t0, idx, z, t0, LogBytesPerInt); 4824 sw(tmp3, Address(t0, 0)); 4825 4826 slli(t0, carry2, 32); 4827 srli(carry, tmp3, 32); 4828 orr(carry, carry, t0); 4829 4830 bind(L_post_third_loop_done); 4831 } 4832 4833 /** 4834 * Code for BigInteger::multiplyToLen() intrinsic. 4835 * 4836 * x10: x 4837 * x11: xlen 4838 * x12: y 4839 * x13: ylen 4840 * x14: z 4841 * x15: tmp0 4842 * x16: tmp1 4843 * x17: tmp2 4844 * x7: tmp3 4845 * x28: tmp4 4846 * x29: tmp5 4847 * x30: tmp6 4848 * x31: tmp7 4849 */ 4850 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 4851 Register z, Register tmp0, 4852 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 4853 Register tmp5, Register tmp6, Register product_hi) { 4854 assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 4855 4856 const Register idx = tmp1; 4857 const Register kdx = tmp2; 4858 const Register xstart = tmp3; 4859 4860 const Register y_idx = tmp4; 4861 const Register carry = tmp5; 4862 const Register product = xlen; 4863 const Register x_xstart = tmp0; 4864 4865 mv(idx, ylen); // idx = ylen; 4866 addw(kdx, xlen, ylen); // kdx = xlen+ylen; 4867 mv(carry, zr); // carry = 0; 4868 4869 Label L_multiply_64_x_64_loop, L_done; 4870 4871 subw(xstart, xlen, 1); 4872 bltz(xstart, L_done); 4873 4874 const Register jdx = tmp1; 4875 4876 if (AvoidUnalignedAccesses) { 4877 // Check if x and y are both 8-byte aligned. 4878 orr(t0, xlen, ylen); 4879 test_bit(t0, t0, 0); 4880 beqz(t0, L_multiply_64_x_64_loop); 4881 4882 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4883 shadd(t0, xstart, z, t0, LogBytesPerInt); 4884 sw(carry, Address(t0, 0)); 4885 4886 Label L_second_loop_unaligned; 4887 bind(L_second_loop_unaligned); 4888 mv(carry, zr); 4889 mv(jdx, ylen); 4890 subw(xstart, xstart, 1); 4891 bltz(xstart, L_done); 4892 sub(sp, sp, 2 * wordSize); 4893 sd(z, Address(sp, 0)); 4894 sd(zr, Address(sp, wordSize)); 4895 shadd(t0, xstart, z, t0, LogBytesPerInt); 4896 addi(z, t0, 4); 4897 shadd(t0, xstart, x, t0, LogBytesPerInt); 4898 lwu(product, Address(t0, 0)); 4899 Label L_third_loop, L_third_loop_exit; 4900 4901 blez(jdx, L_third_loop_exit); 4902 4903 bind(L_third_loop); 4904 subw(jdx, jdx, 1); 4905 shadd(t0, jdx, y, t0, LogBytesPerInt); 4906 lwu(t0, Address(t0, 0)); 4907 mul(t1, t0, product); 4908 add(t0, t1, carry); 4909 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 4910 lwu(t1, Address(tmp6, 0)); 4911 add(t0, t0, t1); 4912 sw(t0, Address(tmp6, 0)); 4913 srli(carry, t0, 32); 4914 bgtz(jdx, L_third_loop); 4915 4916 bind(L_third_loop_exit); 4917 ld(z, Address(sp, 0)); 4918 addi(sp, sp, 2 * wordSize); 4919 shadd(t0, xstart, z, t0, LogBytesPerInt); 4920 sw(carry, Address(t0, 0)); 4921 4922 j(L_second_loop_unaligned); 4923 } 4924 4925 bind(L_multiply_64_x_64_loop); 4926 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4927 4928 Label L_second_loop_aligned; 4929 beqz(kdx, L_second_loop_aligned); 4930 4931 Label L_carry; 4932 subw(kdx, kdx, 1); 4933 beqz(kdx, L_carry); 4934 4935 shadd(t0, kdx, z, t0, LogBytesPerInt); 4936 sw(carry, Address(t0, 0)); 4937 srli(carry, carry, 32); 4938 subw(kdx, kdx, 1); 4939 4940 bind(L_carry); 4941 shadd(t0, kdx, z, t0, LogBytesPerInt); 4942 sw(carry, Address(t0, 0)); 4943 4944 // Second and third (nested) loops. 4945 // 4946 // for (int i = xstart-1; i >= 0; i--) { // Second loop 4947 // carry = 0; 4948 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 4949 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 4950 // (z[k] & LONG_MASK) + carry; 4951 // z[k] = (int)product; 4952 // carry = product >>> 32; 4953 // } 4954 // z[i] = (int)carry; 4955 // } 4956 // 4957 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 4958 4959 bind(L_second_loop_aligned); 4960 mv(carry, zr); // carry = 0; 4961 mv(jdx, ylen); // j = ystart+1 4962 4963 subw(xstart, xstart, 1); // i = xstart-1; 4964 bltz(xstart, L_done); 4965 4966 sub(sp, sp, 4 * wordSize); 4967 sd(z, Address(sp, 0)); 4968 4969 Label L_last_x; 4970 shadd(t0, xstart, z, t0, LogBytesPerInt); 4971 addi(z, t0, 4); 4972 subw(xstart, xstart, 1); // i = xstart-1; 4973 bltz(xstart, L_last_x); 4974 4975 shadd(t0, xstart, x, t0, LogBytesPerInt); 4976 ld(product_hi, Address(t0, 0)); 4977 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 4978 4979 Label L_third_loop_prologue; 4980 bind(L_third_loop_prologue); 4981 4982 sd(ylen, Address(sp, wordSize)); 4983 sd(x, Address(sp, 2 * wordSize)); 4984 sd(xstart, Address(sp, 3 * wordSize)); 4985 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 4986 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 4987 ld(z, Address(sp, 0)); 4988 ld(ylen, Address(sp, wordSize)); 4989 ld(x, Address(sp, 2 * wordSize)); 4990 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 4991 addi(sp, sp, 4 * wordSize); 4992 4993 addiw(tmp3, xlen, 1); 4994 shadd(t0, tmp3, z, t0, LogBytesPerInt); 4995 sw(carry, Address(t0, 0)); 4996 4997 subw(tmp3, tmp3, 1); 4998 bltz(tmp3, L_done); 4999 5000 srli(carry, carry, 32); 5001 shadd(t0, tmp3, z, t0, LogBytesPerInt); 5002 sw(carry, Address(t0, 0)); 5003 j(L_second_loop_aligned); 5004 5005 // Next infrequent code is moved outside loops. 5006 bind(L_last_x); 5007 lwu(product_hi, Address(x, 0)); 5008 j(L_third_loop_prologue); 5009 5010 bind(L_done); 5011 } 5012 #endif 5013 5014 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 5015 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 5016 // shift 16 bits once. 5017 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 5018 if (UseZbb) { 5019 assert_different_registers(Rd, Rs, tmp1); 5020 int step = isLL ? 8 : 16; 5021 ctz(Rd, Rs); 5022 andi(tmp1, Rd, step - 1); 5023 sub(Rd, Rd, tmp1); 5024 return; 5025 } 5026 5027 assert_different_registers(Rd, Rs, tmp1, tmp2); 5028 Label Loop; 5029 int step = isLL ? 8 : 16; 5030 mv(Rd, -step); 5031 mv(tmp2, Rs); 5032 5033 bind(Loop); 5034 addi(Rd, Rd, step); 5035 andi(tmp1, tmp2, ((1 << step) - 1)); 5036 srli(tmp2, tmp2, step); 5037 beqz(tmp1, Loop); 5038 } 5039 5040 // This instruction reads adjacent 4 bytes from the lower half of source register, 5041 // inflate into a register, for example: 5042 // Rs: A7A6A5A4A3A2A1A0 5043 // Rd: 00A300A200A100A0 5044 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5045 assert_different_registers(Rd, Rs, tmp1, tmp2); 5046 5047 mv(tmp1, 0xFF000000); // first byte mask at lower word 5048 andr(Rd, Rs, tmp1); 5049 for (int i = 0; i < 2; i++) { 5050 slli(Rd, Rd, wordSize); 5051 srli(tmp1, tmp1, wordSize); 5052 andr(tmp2, Rs, tmp1); 5053 orr(Rd, Rd, tmp2); 5054 } 5055 slli(Rd, Rd, wordSize); 5056 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 5057 orr(Rd, Rd, tmp2); 5058 } 5059 5060 // This instruction reads adjacent 4 bytes from the upper half of source register, 5061 // inflate into a register, for example: 5062 // Rs: A7A6A5A4A3A2A1A0 5063 // Rd: 00A700A600A500A4 5064 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5065 assert_different_registers(Rd, Rs, tmp1, tmp2); 5066 srli(Rs, Rs, 32); // only upper 32 bits are needed 5067 inflate_lo32(Rd, Rs, tmp1, tmp2); 5068 } 5069 5070 // The size of the blocks erased by the zero_blocks stub. We must 5071 // handle anything smaller than this ourselves in zero_words(). 5072 const int MacroAssembler::zero_words_block_size = 8; 5073 5074 // zero_words() is used by C2 ClearArray patterns. It is as small as 5075 // possible, handling small word counts locally and delegating 5076 // anything larger to the zero_blocks stub. It is expanded many times 5077 // in compiled code, so it is important to keep it short. 5078 5079 // ptr: Address of a buffer to be zeroed. 5080 // cnt: Count in HeapWords. 5081 // 5082 // ptr, cnt, and t0 are clobbered. 5083 address MacroAssembler::zero_words(Register ptr, Register cnt) { 5084 assert(is_power_of_2(zero_words_block_size), "adjust this"); 5085 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 5086 assert_different_registers(cnt, t0); 5087 5088 BLOCK_COMMENT("zero_words {"); 5089 5090 mv(t0, zero_words_block_size); 5091 Label around, done, done16; 5092 bltu(cnt, t0, around); 5093 { 5094 RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks()); 5095 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 5096 if (StubRoutines::riscv::complete()) { 5097 address tpc = reloc_call(zero_blocks); 5098 if (tpc == nullptr) { 5099 DEBUG_ONLY(reset_labels(around)); 5100 postcond(pc() == badAddress); 5101 return nullptr; 5102 } 5103 } else { 5104 rt_call(zero_blocks.target()); 5105 } 5106 } 5107 bind(around); 5108 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 5109 Label l; 5110 test_bit(t0, cnt, exact_log2(i)); 5111 beqz(t0, l); 5112 for (int j = 0; j < i; j++) { 5113 sd(zr, Address(ptr, j * wordSize)); 5114 } 5115 addi(ptr, ptr, i * wordSize); 5116 bind(l); 5117 } 5118 { 5119 Label l; 5120 test_bit(t0, cnt, 0); 5121 beqz(t0, l); 5122 sd(zr, Address(ptr, 0)); 5123 bind(l); 5124 } 5125 5126 BLOCK_COMMENT("} zero_words"); 5127 postcond(pc() != badAddress); 5128 return pc(); 5129 } 5130 5131 #define SmallArraySize (18 * BytesPerLong) 5132 5133 // base: Address of a buffer to be zeroed, 8 bytes aligned. 5134 // cnt: Immediate count in HeapWords. 5135 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 5136 assert_different_registers(base, t0, t1); 5137 5138 BLOCK_COMMENT("zero_words {"); 5139 5140 if (cnt <= SmallArraySize / BytesPerLong) { 5141 for (int i = 0; i < (int)cnt; i++) { 5142 sd(zr, Address(base, i * wordSize)); 5143 } 5144 } else { 5145 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 5146 int remainder = cnt % unroll; 5147 for (int i = 0; i < remainder; i++) { 5148 sd(zr, Address(base, i * wordSize)); 5149 } 5150 5151 Label loop; 5152 Register cnt_reg = t0; 5153 Register loop_base = t1; 5154 cnt = cnt - remainder; 5155 mv(cnt_reg, cnt); 5156 add(loop_base, base, remainder * wordSize); 5157 bind(loop); 5158 sub(cnt_reg, cnt_reg, unroll); 5159 for (int i = 0; i < unroll; i++) { 5160 sd(zr, Address(loop_base, i * wordSize)); 5161 } 5162 add(loop_base, loop_base, unroll * wordSize); 5163 bnez(cnt_reg, loop); 5164 } 5165 5166 BLOCK_COMMENT("} zero_words"); 5167 } 5168 5169 // base: Address of a buffer to be filled, 8 bytes aligned. 5170 // cnt: Count in 8-byte unit. 5171 // value: Value to be filled with. 5172 // base will point to the end of the buffer after filling. 5173 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 5174 // Algorithm: 5175 // 5176 // t0 = cnt & 7 5177 // cnt -= t0 5178 // p += t0 5179 // switch (t0): 5180 // switch start: 5181 // do while cnt 5182 // cnt -= 8 5183 // p[-8] = value 5184 // case 7: 5185 // p[-7] = value 5186 // case 6: 5187 // p[-6] = value 5188 // // ... 5189 // case 1: 5190 // p[-1] = value 5191 // case 0: 5192 // p += 8 5193 // do-while end 5194 // switch end 5195 5196 assert_different_registers(base, cnt, value, t0, t1); 5197 5198 Label fini, skip, entry, loop; 5199 const int unroll = 8; // Number of sd instructions we'll unroll 5200 5201 beqz(cnt, fini); 5202 5203 andi(t0, cnt, unroll - 1); 5204 sub(cnt, cnt, t0); 5205 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 5206 shadd(base, t0, base, t1, 3); 5207 la(t1, entry); 5208 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 5209 sub(t1, t1, t0); 5210 jr(t1); 5211 5212 bind(loop); 5213 add(base, base, unroll * 8); 5214 for (int i = -unroll; i < 0; i++) { 5215 sd(value, Address(base, i * 8)); 5216 } 5217 bind(entry); 5218 sub(cnt, cnt, unroll); 5219 bgez(cnt, loop); 5220 5221 bind(fini); 5222 } 5223 5224 // Zero blocks of memory by using CBO.ZERO. 5225 // 5226 // Aligns the base address first sufficiently for CBO.ZERO, then uses 5227 // CBO.ZERO repeatedly for every full block. cnt is the size to be 5228 // zeroed in HeapWords. Returns the count of words left to be zeroed 5229 // in cnt. 5230 // 5231 // NOTE: This is intended to be used in the zero_blocks() stub. If 5232 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 5233 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 5234 Label initial_table_end, loop; 5235 5236 // Align base with cache line size. 5237 neg(tmp1, base); 5238 andi(tmp1, tmp1, CacheLineSize - 1); 5239 5240 // tmp1: the number of bytes to be filled to align the base with cache line size. 5241 add(base, base, tmp1); 5242 srai(tmp2, tmp1, 3); 5243 sub(cnt, cnt, tmp2); 5244 srli(tmp2, tmp1, 1); 5245 la(tmp1, initial_table_end); 5246 sub(tmp2, tmp1, tmp2); 5247 jr(tmp2); 5248 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 5249 sd(zr, Address(base, i)); 5250 } 5251 bind(initial_table_end); 5252 5253 mv(tmp1, CacheLineSize / wordSize); 5254 bind(loop); 5255 cbo_zero(base); 5256 sub(cnt, cnt, tmp1); 5257 add(base, base, CacheLineSize); 5258 bge(cnt, tmp1, loop); 5259 } 5260 5261 // java.lang.Math.round(float a) 5262 // Returns the closest int to the argument, with ties rounding to positive infinity. 5263 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 5264 // this instructions calling sequence provides performance improvement on all tested devices; 5265 // don't change it without re-verification 5266 Label done; 5267 mv(t0, jint_cast(0.5f)); 5268 fmv_w_x(ftmp, t0); 5269 5270 // dst = 0 if NaN 5271 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 5272 mv(dst, zr); 5273 beqz(t0, done); 5274 5275 // dst = (src + 0.5f) rounded down towards negative infinity 5276 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 5277 // RDN is required for fadd_s, RNE gives incorrect results: 5278 // -------------------------------------------------------------------- 5279 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 5280 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 5281 // -------------------------------------------------------------------- 5282 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 5283 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 5284 // -------------------------------------------------------------------- 5285 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 5286 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 5287 5288 bind(done); 5289 } 5290 5291 // java.lang.Math.round(double a) 5292 // Returns the closest long to the argument, with ties rounding to positive infinity. 5293 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 5294 // this instructions calling sequence provides performance improvement on all tested devices; 5295 // don't change it without re-verification 5296 Label done; 5297 mv(t0, julong_cast(0.5)); 5298 fmv_d_x(ftmp, t0); 5299 5300 // dst = 0 if NaN 5301 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 5302 mv(dst, zr); 5303 beqz(t0, done); 5304 5305 // dst = (src + 0.5) rounded down towards negative infinity 5306 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 5307 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 5308 5309 bind(done); 5310 } 5311 5312 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 5313 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 5314 Label done; \ 5315 assert_different_registers(dst, tmp); \ 5316 fclass_##FLOATSIG(tmp, src); \ 5317 mv(dst, zr); \ 5318 /* check if src is NaN */ \ 5319 andi(tmp, tmp, fclass_mask::nan); \ 5320 bnez(tmp, done); \ 5321 FLOATCVT(dst, src); \ 5322 bind(done); \ 5323 } 5324 5325 FCVT_SAFE(fcvt_w_s, s); 5326 FCVT_SAFE(fcvt_l_s, s); 5327 FCVT_SAFE(fcvt_w_d, d); 5328 FCVT_SAFE(fcvt_l_d, d); 5329 5330 #undef FCVT_SAFE 5331 5332 #define FCMP(FLOATTYPE, FLOATSIG) \ 5333 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 5334 FloatRegister Rs2, int unordered_result) { \ 5335 Label Ldone; \ 5336 if (unordered_result < 0) { \ 5337 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 5338 /* installs 1 if gt else 0 */ \ 5339 flt_##FLOATSIG(result, Rs2, Rs1); \ 5340 /* Rs1 > Rs2, install 1 */ \ 5341 bgtz(result, Ldone); \ 5342 feq_##FLOATSIG(result, Rs1, Rs2); \ 5343 addi(result, result, -1); \ 5344 /* Rs1 = Rs2, install 0 */ \ 5345 /* NaN or Rs1 < Rs2, install -1 */ \ 5346 bind(Ldone); \ 5347 } else { \ 5348 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 5349 /* installs 1 if gt or unordered else 0 */ \ 5350 flt_##FLOATSIG(result, Rs1, Rs2); \ 5351 /* Rs1 < Rs2, install -1 */ \ 5352 bgtz(result, Ldone); \ 5353 feq_##FLOATSIG(result, Rs1, Rs2); \ 5354 addi(result, result, -1); \ 5355 /* Rs1 = Rs2, install 0 */ \ 5356 /* NaN or Rs1 > Rs2, install 1 */ \ 5357 bind(Ldone); \ 5358 neg(result, result); \ 5359 } \ 5360 } 5361 5362 FCMP(float, s); 5363 FCMP(double, d); 5364 5365 #undef FCMP 5366 5367 // Zero words; len is in bytes 5368 // Destroys all registers except addr 5369 // len must be a nonzero multiple of wordSize 5370 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 5371 assert_different_registers(addr, len, tmp, t0, t1); 5372 5373 #ifdef ASSERT 5374 { 5375 Label L; 5376 andi(t0, len, BytesPerWord - 1); 5377 beqz(t0, L); 5378 stop("len is not a multiple of BytesPerWord"); 5379 bind(L); 5380 } 5381 #endif // ASSERT 5382 5383 #ifndef PRODUCT 5384 block_comment("zero memory"); 5385 #endif // PRODUCT 5386 5387 Label loop; 5388 Label entry; 5389 5390 // Algorithm: 5391 // 5392 // t0 = cnt & 7 5393 // cnt -= t0 5394 // p += t0 5395 // switch (t0) { 5396 // do { 5397 // cnt -= 8 5398 // p[-8] = 0 5399 // case 7: 5400 // p[-7] = 0 5401 // case 6: 5402 // p[-6] = 0 5403 // ... 5404 // case 1: 5405 // p[-1] = 0 5406 // case 0: 5407 // p += 8 5408 // } while (cnt) 5409 // } 5410 5411 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 5412 5413 srli(len, len, LogBytesPerWord); 5414 andi(t0, len, unroll - 1); // t0 = cnt % unroll 5415 sub(len, len, t0); // cnt -= unroll 5416 // tmp always points to the end of the region we're about to zero 5417 shadd(tmp, t0, addr, t1, LogBytesPerWord); 5418 la(t1, entry); 5419 slli(t0, t0, 2); 5420 sub(t1, t1, t0); 5421 jr(t1); 5422 bind(loop); 5423 sub(len, len, unroll); 5424 for (int i = -unroll; i < 0; i++) { 5425 sd(zr, Address(tmp, i * wordSize)); 5426 } 5427 bind(entry); 5428 add(tmp, tmp, unroll * wordSize); 5429 bnez(len, loop); 5430 } 5431 5432 // shift left by shamt and add 5433 // Rd = (Rs1 << shamt) + Rs2 5434 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 5435 if (UseZba) { 5436 if (shamt == 1) { 5437 sh1add(Rd, Rs1, Rs2); 5438 return; 5439 } else if (shamt == 2) { 5440 sh2add(Rd, Rs1, Rs2); 5441 return; 5442 } else if (shamt == 3) { 5443 sh3add(Rd, Rs1, Rs2); 5444 return; 5445 } 5446 } 5447 5448 if (shamt != 0) { 5449 assert_different_registers(Rs2, tmp); 5450 slli(tmp, Rs1, shamt); 5451 add(Rd, Rs2, tmp); 5452 } else { 5453 add(Rd, Rs1, Rs2); 5454 } 5455 } 5456 5457 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 5458 switch (bits) { 5459 case 32: 5460 if (UseZba) { 5461 zext_w(dst, src); 5462 return; 5463 } 5464 break; 5465 case 16: 5466 if (UseZbb) { 5467 zext_h(dst, src); 5468 return; 5469 } 5470 break; 5471 case 8: 5472 if (UseZbb) { 5473 zext_b(dst, src); 5474 return; 5475 } 5476 break; 5477 default: 5478 break; 5479 } 5480 slli(dst, src, XLEN - bits); 5481 srli(dst, dst, XLEN - bits); 5482 } 5483 5484 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 5485 switch (bits) { 5486 case 32: 5487 sext_w(dst, src); 5488 return; 5489 case 16: 5490 if (UseZbb) { 5491 sext_h(dst, src); 5492 return; 5493 } 5494 break; 5495 case 8: 5496 if (UseZbb) { 5497 sext_b(dst, src); 5498 return; 5499 } 5500 break; 5501 default: 5502 break; 5503 } 5504 slli(dst, src, XLEN - bits); 5505 srai(dst, dst, XLEN - bits); 5506 } 5507 5508 void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, 5509 Register tmp, bool is_signed) { 5510 if (src1 == src2) { 5511 mv(dst, zr); 5512 return; 5513 } 5514 Label done; 5515 Register left = src1; 5516 Register right = src2; 5517 if (dst == src1) { 5518 assert_different_registers(dst, src2, tmp); 5519 mv(tmp, src1); 5520 left = tmp; 5521 } else if (dst == src2) { 5522 assert_different_registers(dst, src1, tmp); 5523 mv(tmp, src2); 5524 right = tmp; 5525 } 5526 5527 // installs 1 if gt else 0 5528 if (is_signed) { 5529 slt(dst, right, left); 5530 } else { 5531 sltu(dst, right, left); 5532 } 5533 bnez(dst, done); 5534 if (is_signed) { 5535 slt(dst, left, right); 5536 } else { 5537 sltu(dst, left, right); 5538 } 5539 // dst = -1 if lt; else if eq , dst = 0 5540 neg(dst, dst); 5541 bind(done); 5542 } 5543 5544 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 5545 { 5546 cmp_x2i(dst, src1, src2, tmp); 5547 } 5548 5549 void MacroAssembler::cmp_ul2i(Register dst, Register src1, Register src2, Register tmp) { 5550 cmp_x2i(dst, src1, src2, tmp, false); 5551 } 5552 5553 void MacroAssembler::cmp_uw2i(Register dst, Register src1, Register src2, Register tmp) { 5554 cmp_x2i(dst, src1, src2, tmp, false); 5555 } 5556 5557 // The java_calling_convention describes stack locations as ideal slots on 5558 // a frame with no abi restrictions. Since we must observe abi restrictions 5559 // (like the placement of the register window) the slots must be biased by 5560 // the following value. 5561 static int reg2offset_in(VMReg r) { 5562 // Account for saved fp and ra 5563 // This should really be in_preserve_stack_slots 5564 return r->reg2stack() * VMRegImpl::stack_slot_size; 5565 } 5566 5567 static int reg2offset_out(VMReg r) { 5568 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 5569 } 5570 5571 // The C ABI specifies: 5572 // "integer scalars narrower than XLEN bits are widened according to the sign 5573 // of their type up to 32 bits, then sign-extended to XLEN bits." 5574 // Applies for both passed in register and stack. 5575 // 5576 // Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot. 5577 // Native uses 64-bit stack slots for all integer scalar types. 5578 // 5579 // lw loads the Java stack slot, sign-extends and 5580 // sd store this widened integer into a 64 bit native stack slot. 5581 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 5582 if (src.first()->is_stack()) { 5583 if (dst.first()->is_stack()) { 5584 // stack to stack 5585 lw(tmp, Address(fp, reg2offset_in(src.first()))); 5586 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5587 } else { 5588 // stack to reg 5589 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5590 } 5591 } else if (dst.first()->is_stack()) { 5592 // reg to stack 5593 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5594 } else { 5595 if (dst.first() != src.first()) { 5596 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 5597 } 5598 } 5599 } 5600 5601 // An oop arg. Must pass a handle not the oop itself 5602 void MacroAssembler::object_move(OopMap* map, 5603 int oop_handle_offset, 5604 int framesize_in_slots, 5605 VMRegPair src, 5606 VMRegPair dst, 5607 bool is_receiver, 5608 int* receiver_offset) { 5609 assert_cond(map != nullptr && receiver_offset != nullptr); 5610 5611 // must pass a handle. First figure out the location we use as a handle 5612 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 5613 5614 // See if oop is null if it is we need no handle 5615 5616 if (src.first()->is_stack()) { 5617 // Oop is already on the stack as an argument 5618 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 5619 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 5620 if (is_receiver) { 5621 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 5622 } 5623 5624 ld(t0, Address(fp, reg2offset_in(src.first()))); 5625 la(rHandle, Address(fp, reg2offset_in(src.first()))); 5626 // conditionally move a null 5627 Label notZero1; 5628 bnez(t0, notZero1); 5629 mv(rHandle, zr); 5630 bind(notZero1); 5631 } else { 5632 5633 // Oop is in a register we must store it to the space we reserve 5634 // on the stack for oop_handles and pass a handle if oop is non-null 5635 5636 const Register rOop = src.first()->as_Register(); 5637 int oop_slot = -1; 5638 if (rOop == j_rarg0) { 5639 oop_slot = 0; 5640 } else if (rOop == j_rarg1) { 5641 oop_slot = 1; 5642 } else if (rOop == j_rarg2) { 5643 oop_slot = 2; 5644 } else if (rOop == j_rarg3) { 5645 oop_slot = 3; 5646 } else if (rOop == j_rarg4) { 5647 oop_slot = 4; 5648 } else if (rOop == j_rarg5) { 5649 oop_slot = 5; 5650 } else if (rOop == j_rarg6) { 5651 oop_slot = 6; 5652 } else { 5653 assert(rOop == j_rarg7, "wrong register"); 5654 oop_slot = 7; 5655 } 5656 5657 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 5658 int offset = oop_slot * VMRegImpl::stack_slot_size; 5659 5660 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 5661 // Store oop in handle area, may be null 5662 sd(rOop, Address(sp, offset)); 5663 if (is_receiver) { 5664 *receiver_offset = offset; 5665 } 5666 5667 //rOop maybe the same as rHandle 5668 if (rOop == rHandle) { 5669 Label isZero; 5670 beqz(rOop, isZero); 5671 la(rHandle, Address(sp, offset)); 5672 bind(isZero); 5673 } else { 5674 Label notZero2; 5675 la(rHandle, Address(sp, offset)); 5676 bnez(rOop, notZero2); 5677 mv(rHandle, zr); 5678 bind(notZero2); 5679 } 5680 } 5681 5682 // If arg is on the stack then place it otherwise it is already in correct reg. 5683 if (dst.first()->is_stack()) { 5684 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 5685 } 5686 } 5687 5688 // A float arg may have to do float reg int reg conversion 5689 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 5690 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5691 (src.first()->is_reg() && dst.first()->is_reg()) || 5692 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5693 if (src.first()->is_stack()) { 5694 if (dst.first()->is_stack()) { 5695 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 5696 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 5697 } else if (dst.first()->is_Register()) { 5698 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5699 } else { 5700 ShouldNotReachHere(); 5701 } 5702 } else if (src.first() != dst.first()) { 5703 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5704 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5705 } else { 5706 ShouldNotReachHere(); 5707 } 5708 } 5709 } 5710 5711 // A long move 5712 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 5713 if (src.first()->is_stack()) { 5714 if (dst.first()->is_stack()) { 5715 // stack to stack 5716 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5717 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5718 } else { 5719 // stack to reg 5720 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5721 } 5722 } else if (dst.first()->is_stack()) { 5723 // reg to stack 5724 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5725 } else { 5726 if (dst.first() != src.first()) { 5727 mv(dst.first()->as_Register(), src.first()->as_Register()); 5728 } 5729 } 5730 } 5731 5732 // A double move 5733 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 5734 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5735 (src.first()->is_reg() && dst.first()->is_reg()) || 5736 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5737 if (src.first()->is_stack()) { 5738 if (dst.first()->is_stack()) { 5739 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5740 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5741 } else if (dst.first()-> is_Register()) { 5742 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5743 } else { 5744 ShouldNotReachHere(); 5745 } 5746 } else if (src.first() != dst.first()) { 5747 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5748 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5749 } else { 5750 ShouldNotReachHere(); 5751 } 5752 } 5753 } 5754 5755 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 5756 assert(bit_pos < 64, "invalid bit range"); 5757 if (UseZbs) { 5758 bexti(Rd, Rs, bit_pos); 5759 return; 5760 } 5761 int64_t imm = (int64_t)(1UL << bit_pos); 5762 if (is_simm12(imm)) { 5763 and_imm12(Rd, Rs, imm); 5764 } else { 5765 srli(Rd, Rs, bit_pos); 5766 and_imm12(Rd, Rd, 1); 5767 } 5768 } 5769 5770 // Implements lightweight-locking. 5771 // 5772 // - obj: the object to be locked 5773 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 5774 // - slow: branched to if locking fails 5775 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5776 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5777 assert_different_registers(basic_lock, obj, tmp1, tmp2, tmp3, t0); 5778 5779 Label push; 5780 const Register top = tmp1; 5781 const Register mark = tmp2; 5782 const Register t = tmp3; 5783 5784 // Preload the markWord. It is important that this is the first 5785 // instruction emitted as it is part of C1's null check semantics. 5786 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5787 5788 if (UseObjectMonitorTable) { 5789 // Clear cache in case fast locking succeeds. 5790 sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); 5791 } 5792 5793 // Check if the lock-stack is full. 5794 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5795 mv(t, (unsigned)LockStack::end_offset()); 5796 bge(top, t, slow, /* is_far */ true); 5797 5798 // Check for recursion. 5799 add(t, xthread, top); 5800 ld(t, Address(t, -oopSize)); 5801 beq(obj, t, push); 5802 5803 // Check header for monitor (0b10). 5804 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5805 bnez(t, slow, /* is_far */ true); 5806 5807 // Try to lock. Transition lock-bits 0b01 => 0b00 5808 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 5809 ori(mark, mark, markWord::unlocked_value); 5810 xori(t, mark, markWord::unlocked_value); 5811 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5812 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 5813 bne(mark, t, slow, /* is_far */ true); 5814 5815 bind(push); 5816 // After successful lock, push object on lock-stack. 5817 add(t, xthread, top); 5818 sd(obj, Address(t)); 5819 addw(top, top, oopSize); 5820 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5821 } 5822 5823 // Implements ligthweight-unlocking. 5824 // 5825 // - obj: the object to be unlocked 5826 // - tmp1, tmp2, tmp3: temporary registers 5827 // - slow: branched to if unlocking fails 5828 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5829 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5830 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5831 5832 #ifdef ASSERT 5833 { 5834 // Check for lock-stack underflow. 5835 Label stack_ok; 5836 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 5837 mv(tmp2, (unsigned)LockStack::start_offset()); 5838 bge(tmp1, tmp2, stack_ok); 5839 STOP("Lock-stack underflow"); 5840 bind(stack_ok); 5841 } 5842 #endif 5843 5844 Label unlocked, push_and_slow; 5845 const Register top = tmp1; 5846 const Register mark = tmp2; 5847 const Register t = tmp3; 5848 5849 // Check if obj is top of lock-stack. 5850 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5851 subw(top, top, oopSize); 5852 add(t, xthread, top); 5853 ld(t, Address(t)); 5854 bne(obj, t, slow, /* is_far */ true); 5855 5856 // Pop lock-stack. 5857 DEBUG_ONLY(add(t, xthread, top);) 5858 DEBUG_ONLY(sd(zr, Address(t));) 5859 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5860 5861 // Check if recursive. 5862 add(t, xthread, top); 5863 ld(t, Address(t, -oopSize)); 5864 beq(obj, t, unlocked); 5865 5866 // Not recursive. Check header for monitor (0b10). 5867 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5868 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5869 bnez(t, push_and_slow); 5870 5871 #ifdef ASSERT 5872 // Check header not unlocked (0b01). 5873 Label not_unlocked; 5874 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 5875 beqz(t, not_unlocked); 5876 stop("lightweight_unlock already unlocked"); 5877 bind(not_unlocked); 5878 #endif 5879 5880 // Try to unlock. Transition lock bits 0b00 => 0b01 5881 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 5882 ori(t, mark, markWord::unlocked_value); 5883 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5884 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 5885 beq(mark, t, unlocked); 5886 5887 bind(push_and_slow); 5888 // Restore lock-stack and handle the unlock in runtime. 5889 DEBUG_ONLY(add(t, xthread, top);) 5890 DEBUG_ONLY(sd(obj, Address(t));) 5891 addw(top, top, oopSize); 5892 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5893 j(slow); 5894 5895 bind(unlocked); 5896 }