1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/cardTable.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.hpp" 37 #include "interpreter/bytecodeHistogram.hpp" 38 #include "interpreter/interpreter.hpp" 39 #include "interpreter/interpreterRuntime.hpp" 40 #include "memory/resourceArea.hpp" 41 #include "memory/universe.hpp" 42 #include "oops/accessDecorators.hpp" 43 #include "oops/compressedKlass.inline.hpp" 44 #include "oops/compressedOops.inline.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/oop.hpp" 47 #include "runtime/interfaceSupport.inline.hpp" 48 #include "runtime/javaThread.hpp" 49 #include "runtime/jniHandles.inline.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/stubRoutines.hpp" 52 #include "utilities/globalDefinitions.hpp" 53 #include "utilities/powerOfTwo.hpp" 54 #ifdef COMPILER2 55 #include "opto/compile.hpp" 56 #include "opto/node.hpp" 57 #include "opto/output.hpp" 58 #endif 59 60 #ifdef PRODUCT 61 #define BLOCK_COMMENT(str) /* nothing */ 62 #else 63 #define BLOCK_COMMENT(str) block_comment(str) 64 #endif 65 #define STOP(str) stop(str); 66 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 67 68 69 70 Register MacroAssembler::extract_rs1(address instr) { 71 assert_cond(instr != nullptr); 72 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 19, 15)); 73 } 74 75 Register MacroAssembler::extract_rs2(address instr) { 76 assert_cond(instr != nullptr); 77 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 24, 20)); 78 } 79 80 Register MacroAssembler::extract_rd(address instr) { 81 assert_cond(instr != nullptr); 82 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 11, 7)); 83 } 84 85 uint32_t MacroAssembler::extract_opcode(address instr) { 86 assert_cond(instr != nullptr); 87 return Assembler::extract(Assembler::ld_instr(instr), 6, 0); 88 } 89 90 uint32_t MacroAssembler::extract_funct3(address instr) { 91 assert_cond(instr != nullptr); 92 return Assembler::extract(Assembler::ld_instr(instr), 14, 12); 93 } 94 95 bool MacroAssembler::is_pc_relative_at(address instr) { 96 // auipc + jalr 97 // auipc + addi 98 // auipc + load 99 // auipc + fload_load 100 return (is_auipc_at(instr)) && 101 (is_addi_at(instr + instruction_size) || 102 is_jalr_at(instr + instruction_size) || 103 is_load_at(instr + instruction_size) || 104 is_float_load_at(instr + instruction_size)) && 105 check_pc_relative_data_dependency(instr); 106 } 107 108 // ie:ld(Rd, Label) 109 bool MacroAssembler::is_load_pc_relative_at(address instr) { 110 return is_auipc_at(instr) && // auipc 111 is_ld_at(instr + instruction_size) && // ld 112 check_load_pc_relative_data_dependency(instr); 113 } 114 115 bool MacroAssembler::is_movptr1_at(address instr) { 116 return is_lui_at(instr) && // Lui 117 is_addi_at(instr + instruction_size) && // Addi 118 is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 119 is_addi_at(instr + instruction_size * 3) && // Addi 120 is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 121 (is_addi_at(instr + instruction_size * 5) || 122 is_jalr_at(instr + instruction_size * 5) || 123 is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load 124 check_movptr1_data_dependency(instr); 125 } 126 127 bool MacroAssembler::is_movptr2_at(address instr) { 128 return is_lui_at(instr) && // lui 129 is_lui_at(instr + instruction_size) && // lui 130 is_slli_shift_at(instr + instruction_size * 2, 18) && // slli Rd, Rs, 18 131 is_add_at(instr + instruction_size * 3) && 132 (is_addi_at(instr + instruction_size * 4) || 133 is_jalr_at(instr + instruction_size * 4) || 134 is_load_at(instr + instruction_size * 4)) && // Addi/Jalr/Load 135 check_movptr2_data_dependency(instr); 136 } 137 138 bool MacroAssembler::is_li16u_at(address instr) { 139 return is_lui_at(instr) && // lui 140 is_srli_at(instr + instruction_size) && // srli 141 check_li16u_data_dependency(instr); 142 } 143 144 bool MacroAssembler::is_li32_at(address instr) { 145 return is_lui_at(instr) && // lui 146 is_addiw_at(instr + instruction_size) && // addiw 147 check_li32_data_dependency(instr); 148 } 149 150 bool MacroAssembler::is_lwu_to_zr(address instr) { 151 assert_cond(instr != nullptr); 152 return (extract_opcode(instr) == 0b0000011 && 153 extract_funct3(instr) == 0b110 && 154 extract_rd(instr) == zr); // zr 155 } 156 157 uint32_t MacroAssembler::get_membar_kind(address addr) { 158 assert_cond(addr != nullptr); 159 assert(is_membar(addr), "no membar found"); 160 161 uint32_t insn = Bytes::get_native_u4(addr); 162 163 uint32_t predecessor = Assembler::extract(insn, 27, 24); 164 uint32_t successor = Assembler::extract(insn, 23, 20); 165 166 return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); 167 } 168 169 void MacroAssembler::set_membar_kind(address addr, uint32_t order_kind) { 170 assert_cond(addr != nullptr); 171 assert(is_membar(addr), "no membar found"); 172 173 uint32_t predecessor = 0; 174 uint32_t successor = 0; 175 176 MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); 177 178 uint32_t insn = Bytes::get_native_u4(addr); 179 address pInsn = (address) &insn; 180 Assembler::patch(pInsn, 27, 24, predecessor); 181 Assembler::patch(pInsn, 23, 20, successor); 182 183 address membar = addr; 184 Assembler::sd_instr(membar, insn); 185 } 186 187 188 static void pass_arg0(MacroAssembler* masm, Register arg) { 189 if (c_rarg0 != arg) { 190 masm->mv(c_rarg0, arg); 191 } 192 } 193 194 static void pass_arg1(MacroAssembler* masm, Register arg) { 195 if (c_rarg1 != arg) { 196 masm->mv(c_rarg1, arg); 197 } 198 } 199 200 static void pass_arg2(MacroAssembler* masm, Register arg) { 201 if (c_rarg2 != arg) { 202 masm->mv(c_rarg2, arg); 203 } 204 } 205 206 static void pass_arg3(MacroAssembler* masm, Register arg) { 207 if (c_rarg3 != arg) { 208 masm->mv(c_rarg3, arg); 209 } 210 } 211 212 void MacroAssembler::push_cont_fastpath(Register java_thread) { 213 if (!Continuations::enabled()) return; 214 Label done; 215 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 216 bleu(sp, t0, done); 217 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 218 bind(done); 219 } 220 221 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 222 if (!Continuations::enabled()) return; 223 Label done; 224 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 225 bltu(sp, t0, done); 226 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 227 bind(done); 228 } 229 230 void MacroAssembler::inc_held_monitor_count(Register tmp) { 231 Address dst = Address(xthread, JavaThread::held_monitor_count_offset()); 232 ld(tmp, dst); 233 addi(tmp, tmp, 1); 234 sd(tmp, dst); 235 #ifdef ASSERT 236 Label ok; 237 test_bit(tmp, tmp, 63); 238 beqz(tmp, ok); 239 STOP("assert(held monitor count overflow)"); 240 should_not_reach_here(); 241 bind(ok); 242 #endif 243 } 244 245 void MacroAssembler::dec_held_monitor_count(Register tmp) { 246 Address dst = Address(xthread, JavaThread::held_monitor_count_offset()); 247 ld(tmp, dst); 248 addi(tmp, tmp, -1); 249 sd(tmp, dst); 250 #ifdef ASSERT 251 Label ok; 252 test_bit(tmp, tmp, 63); 253 beqz(tmp, ok); 254 STOP("assert(held monitor count underflow)"); 255 should_not_reach_here(); 256 bind(ok); 257 #endif 258 } 259 260 int MacroAssembler::align(int modulus, int extra_offset) { 261 CompressibleRegion cr(this); 262 intptr_t before = offset(); 263 while ((offset() + extra_offset) % modulus != 0) { nop(); } 264 return (int)(offset() - before); 265 } 266 267 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 268 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 269 } 270 271 // Implementation of call_VM versions 272 273 void MacroAssembler::call_VM(Register oop_result, 274 address entry_point, 275 bool check_exceptions) { 276 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 277 } 278 279 void MacroAssembler::call_VM(Register oop_result, 280 address entry_point, 281 Register arg_1, 282 bool check_exceptions) { 283 pass_arg1(this, arg_1); 284 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 285 } 286 287 void MacroAssembler::call_VM(Register oop_result, 288 address entry_point, 289 Register arg_1, 290 Register arg_2, 291 bool check_exceptions) { 292 assert_different_registers(arg_1, c_rarg2); 293 pass_arg2(this, arg_2); 294 pass_arg1(this, arg_1); 295 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 296 } 297 298 void MacroAssembler::call_VM(Register oop_result, 299 address entry_point, 300 Register arg_1, 301 Register arg_2, 302 Register arg_3, 303 bool check_exceptions) { 304 assert_different_registers(arg_1, c_rarg2, c_rarg3); 305 assert_different_registers(arg_2, c_rarg3); 306 pass_arg3(this, arg_3); 307 308 pass_arg2(this, arg_2); 309 310 pass_arg1(this, arg_1); 311 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 312 } 313 314 void MacroAssembler::call_VM(Register oop_result, 315 Register last_java_sp, 316 address entry_point, 317 int number_of_arguments, 318 bool check_exceptions) { 319 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 320 } 321 322 void MacroAssembler::call_VM(Register oop_result, 323 Register last_java_sp, 324 address entry_point, 325 Register arg_1, 326 bool check_exceptions) { 327 pass_arg1(this, arg_1); 328 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 329 } 330 331 void MacroAssembler::call_VM(Register oop_result, 332 Register last_java_sp, 333 address entry_point, 334 Register arg_1, 335 Register arg_2, 336 bool check_exceptions) { 337 338 assert_different_registers(arg_1, c_rarg2); 339 pass_arg2(this, arg_2); 340 pass_arg1(this, arg_1); 341 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 342 } 343 344 void MacroAssembler::call_VM(Register oop_result, 345 Register last_java_sp, 346 address entry_point, 347 Register arg_1, 348 Register arg_2, 349 Register arg_3, 350 bool check_exceptions) { 351 assert_different_registers(arg_1, c_rarg2, c_rarg3); 352 assert_different_registers(arg_2, c_rarg3); 353 pass_arg3(this, arg_3); 354 pass_arg2(this, arg_2); 355 pass_arg1(this, arg_1); 356 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 357 } 358 359 void MacroAssembler::post_call_nop() { 360 if (!Continuations::enabled()) { 361 return; 362 } 363 relocate(post_call_nop_Relocation::spec(), [&] { 364 InlineSkippedInstructionsCounter skipCounter(this); 365 nop(); 366 li32(zr, 0); 367 }); 368 } 369 370 // these are no-ops overridden by InterpreterMacroAssembler 371 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 372 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 373 374 // Calls to C land 375 // 376 // When entering C land, the fp, & esp of the last Java frame have to be recorded 377 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 378 // has to be reset to 0. This is required to allow proper stack traversal. 379 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 380 Register last_java_fp, 381 Register last_java_pc) { 382 383 if (last_java_pc->is_valid()) { 384 sd(last_java_pc, Address(xthread, 385 JavaThread::frame_anchor_offset() + 386 JavaFrameAnchor::last_Java_pc_offset())); 387 } 388 389 // determine last_java_sp register 390 if (!last_java_sp->is_valid()) { 391 last_java_sp = esp; 392 } 393 394 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 395 396 // last_java_fp is optional 397 if (last_java_fp->is_valid()) { 398 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 399 } 400 } 401 402 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 403 Register last_java_fp, 404 address last_java_pc, 405 Register tmp) { 406 assert(last_java_pc != nullptr, "must provide a valid PC"); 407 408 la(tmp, last_java_pc); 409 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 410 411 set_last_Java_frame(last_java_sp, last_java_fp, noreg); 412 } 413 414 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 415 Register last_java_fp, 416 Label &L, 417 Register tmp) { 418 if (L.is_bound()) { 419 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 420 } else { 421 L.add_patch_at(code(), locator()); 422 IncompressibleRegion ir(this); // the label address will be patched back. 423 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 424 } 425 } 426 427 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 428 // we must set sp to zero to clear frame 429 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 430 431 // must clear fp, so that compiled frames are not confused; it is 432 // possible that we need it only for debugging 433 if (clear_fp) { 434 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 435 } 436 437 // Always clear the pc because it could have been set by make_walkable() 438 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 439 } 440 441 static bool is_preemptable(address entry_point) { 442 return entry_point == CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter); 443 } 444 445 void MacroAssembler::call_VM_base(Register oop_result, 446 Register java_thread, 447 Register last_java_sp, 448 address entry_point, 449 int number_of_arguments, 450 bool check_exceptions) { 451 // determine java_thread register 452 if (!java_thread->is_valid()) { 453 java_thread = xthread; 454 } 455 // determine last_java_sp register 456 if (!last_java_sp->is_valid()) { 457 last_java_sp = esp; 458 } 459 460 // debugging support 461 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 462 assert(java_thread == xthread, "unexpected register"); 463 464 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 465 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 466 467 // push java thread (becomes first argument of C function) 468 mv(c_rarg0, java_thread); 469 470 // set last Java frame before call 471 assert(last_java_sp != fp, "can't use fp"); 472 473 Label l; 474 if (is_preemptable(entry_point)) { 475 // skip setting last_pc since we already set it to desired value. 476 set_last_Java_frame(last_java_sp, fp, noreg); 477 } else { 478 set_last_Java_frame(last_java_sp, fp, l, t0); 479 } 480 481 // do the call, remove parameters 482 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 483 484 // reset last Java frame 485 // Only interpreter should have to clear fp 486 reset_last_Java_frame(true); 487 488 // C++ interp handles this in the interpreter 489 check_and_handle_popframe(java_thread); 490 check_and_handle_earlyret(java_thread); 491 492 if (check_exceptions) { 493 // check for pending exceptions (java_thread is set upon return) 494 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 495 Label ok; 496 beqz(t0, ok); 497 RuntimeAddress target(StubRoutines::forward_exception_entry()); 498 relocate(target.rspec(), [&] { 499 int32_t offset; 500 la(t0, target.target(), offset); 501 jr(t0, offset); 502 }); 503 bind(ok); 504 } 505 506 // get oop result if there is one and reset the value in the thread 507 if (oop_result->is_valid()) { 508 get_vm_result(oop_result, java_thread); 509 } 510 } 511 512 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 513 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 514 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 515 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 516 } 517 518 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 519 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 520 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 521 } 522 523 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 524 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 525 assert_different_registers(klass, xthread, tmp); 526 527 Label L_fallthrough, L_tmp; 528 if (L_fast_path == nullptr) { 529 L_fast_path = &L_fallthrough; 530 } else if (L_slow_path == nullptr) { 531 L_slow_path = &L_fallthrough; 532 } 533 534 // Fast path check: class is fully initialized 535 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 536 sub(tmp, tmp, InstanceKlass::fully_initialized); 537 beqz(tmp, *L_fast_path); 538 539 // Fast path check: current thread is initializer thread 540 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 541 542 if (L_slow_path == &L_fallthrough) { 543 beq(xthread, tmp, *L_fast_path); 544 bind(*L_slow_path); 545 } else if (L_fast_path == &L_fallthrough) { 546 bne(xthread, tmp, *L_slow_path); 547 bind(*L_fast_path); 548 } else { 549 Unimplemented(); 550 } 551 } 552 553 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 554 if (!VerifyOops) { return; } 555 556 // Pass register number to verify_oop_subroutine 557 const char* b = nullptr; 558 { 559 ResourceMark rm; 560 stringStream ss; 561 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 562 b = code_string(ss.as_string()); 563 } 564 BLOCK_COMMENT("verify_oop {"); 565 566 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 567 568 mv(c_rarg0, reg); // c_rarg0 : x10 569 { 570 // The length of the instruction sequence emitted should not depend 571 // on the address of the char buffer so that the size of mach nodes for 572 // scratch emit and normal emit matches. 573 IncompressibleRegion ir(this); // Fixed length 574 movptr(t0, (address) b); 575 } 576 577 // call indirectly to solve generation ordering problem 578 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 579 relocate(target.rspec(), [&] { 580 int32_t offset; 581 la(t1, target.target(), offset); 582 ld(t1, Address(t1, offset)); 583 }); 584 jalr(t1); 585 586 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 587 588 BLOCK_COMMENT("} verify_oop"); 589 } 590 591 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 592 if (!VerifyOops) { 593 return; 594 } 595 596 const char* b = nullptr; 597 { 598 ResourceMark rm; 599 stringStream ss; 600 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 601 b = code_string(ss.as_string()); 602 } 603 BLOCK_COMMENT("verify_oop_addr {"); 604 605 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 606 607 if (addr.uses(sp)) { 608 la(x10, addr); 609 ld(x10, Address(x10, 4 * wordSize)); 610 } else { 611 ld(x10, addr); 612 } 613 614 { 615 // The length of the instruction sequence emitted should not depend 616 // on the address of the char buffer so that the size of mach nodes for 617 // scratch emit and normal emit matches. 618 IncompressibleRegion ir(this); // Fixed length 619 movptr(t0, (address) b); 620 } 621 622 // call indirectly to solve generation ordering problem 623 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 624 relocate(target.rspec(), [&] { 625 int32_t offset; 626 la(t1, target.target(), offset); 627 ld(t1, Address(t1, offset)); 628 }); 629 jalr(t1); 630 631 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 632 633 BLOCK_COMMENT("} verify_oop_addr"); 634 } 635 636 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 637 int extra_slot_offset) { 638 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 639 int stackElementSize = Interpreter::stackElementSize; 640 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 641 #ifdef ASSERT 642 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 643 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 644 #endif 645 if (arg_slot.is_constant()) { 646 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 647 } else { 648 assert_different_registers(t0, arg_slot.as_register()); 649 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 650 return Address(t0, offset); 651 } 652 } 653 654 #ifndef PRODUCT 655 extern "C" void findpc(intptr_t x); 656 #endif 657 658 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 659 { 660 // In order to get locks to work, we need to fake a in_VM state 661 if (ShowMessageBoxOnError) { 662 JavaThread* thread = JavaThread::current(); 663 JavaThreadState saved_state = thread->thread_state(); 664 thread->set_thread_state(_thread_in_vm); 665 #ifndef PRODUCT 666 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 667 ttyLocker ttyl; 668 BytecodeCounter::print(); 669 } 670 #endif 671 if (os::message_box(msg, "Execution stopped, print registers?")) { 672 ttyLocker ttyl; 673 tty->print_cr(" pc = 0x%016lx", pc); 674 #ifndef PRODUCT 675 tty->cr(); 676 findpc(pc); 677 tty->cr(); 678 #endif 679 tty->print_cr(" x0 = 0x%016lx", regs[0]); 680 tty->print_cr(" x1 = 0x%016lx", regs[1]); 681 tty->print_cr(" x2 = 0x%016lx", regs[2]); 682 tty->print_cr(" x3 = 0x%016lx", regs[3]); 683 tty->print_cr(" x4 = 0x%016lx", regs[4]); 684 tty->print_cr(" x5 = 0x%016lx", regs[5]); 685 tty->print_cr(" x6 = 0x%016lx", regs[6]); 686 tty->print_cr(" x7 = 0x%016lx", regs[7]); 687 tty->print_cr(" x8 = 0x%016lx", regs[8]); 688 tty->print_cr(" x9 = 0x%016lx", regs[9]); 689 tty->print_cr("x10 = 0x%016lx", regs[10]); 690 tty->print_cr("x11 = 0x%016lx", regs[11]); 691 tty->print_cr("x12 = 0x%016lx", regs[12]); 692 tty->print_cr("x13 = 0x%016lx", regs[13]); 693 tty->print_cr("x14 = 0x%016lx", regs[14]); 694 tty->print_cr("x15 = 0x%016lx", regs[15]); 695 tty->print_cr("x16 = 0x%016lx", regs[16]); 696 tty->print_cr("x17 = 0x%016lx", regs[17]); 697 tty->print_cr("x18 = 0x%016lx", regs[18]); 698 tty->print_cr("x19 = 0x%016lx", regs[19]); 699 tty->print_cr("x20 = 0x%016lx", regs[20]); 700 tty->print_cr("x21 = 0x%016lx", regs[21]); 701 tty->print_cr("x22 = 0x%016lx", regs[22]); 702 tty->print_cr("x23 = 0x%016lx", regs[23]); 703 tty->print_cr("x24 = 0x%016lx", regs[24]); 704 tty->print_cr("x25 = 0x%016lx", regs[25]); 705 tty->print_cr("x26 = 0x%016lx", regs[26]); 706 tty->print_cr("x27 = 0x%016lx", regs[27]); 707 tty->print_cr("x28 = 0x%016lx", regs[28]); 708 tty->print_cr("x30 = 0x%016lx", regs[30]); 709 tty->print_cr("x31 = 0x%016lx", regs[31]); 710 BREAKPOINT; 711 } 712 } 713 fatal("DEBUG MESSAGE: %s", msg); 714 } 715 716 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 717 assert_different_registers(value, tmp1, tmp2); 718 Label done, tagged, weak_tagged; 719 720 beqz(value, done); // Use null as-is. 721 // Test for tag. 722 andi(tmp1, value, JNIHandles::tag_mask); 723 bnez(tmp1, tagged); 724 725 // Resolve local handle 726 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 727 verify_oop(value); 728 j(done); 729 730 bind(tagged); 731 // Test for jweak tag. 732 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 733 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 734 bnez(tmp1, weak_tagged); 735 736 // Resolve global handle 737 access_load_at(T_OBJECT, IN_NATIVE, value, 738 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 739 verify_oop(value); 740 j(done); 741 742 bind(weak_tagged); 743 // Resolve jweak. 744 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 745 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 746 verify_oop(value); 747 748 bind(done); 749 } 750 751 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 752 assert_different_registers(value, tmp1, tmp2); 753 Label done; 754 755 beqz(value, done); // Use null as-is. 756 757 #ifdef ASSERT 758 { 759 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 760 Label valid_global_tag; 761 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 762 bnez(tmp1, valid_global_tag); 763 stop("non global jobject using resolve_global_jobject"); 764 bind(valid_global_tag); 765 } 766 #endif 767 768 // Resolve global handle 769 access_load_at(T_OBJECT, IN_NATIVE, value, 770 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 771 verify_oop(value); 772 773 bind(done); 774 } 775 776 void MacroAssembler::stop(const char* msg) { 777 BLOCK_COMMENT(msg); 778 illegal_instruction(Assembler::csr::time); 779 emit_int64((uintptr_t)msg); 780 } 781 782 void MacroAssembler::unimplemented(const char* what) { 783 const char* buf = nullptr; 784 { 785 ResourceMark rm; 786 stringStream ss; 787 ss.print("unimplemented: %s", what); 788 buf = code_string(ss.as_string()); 789 } 790 stop(buf); 791 } 792 793 void MacroAssembler::emit_static_call_stub() { 794 IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). 795 // CompiledDirectCall::set_to_interpreted knows the 796 // exact layout of this stub. 797 798 mov_metadata(xmethod, (Metadata*)nullptr); 799 800 // Jump to the entry point of the c2i stub. 801 int32_t offset = 0; 802 movptr(t0, 0, offset, t1); // lui + lui + slli + add 803 jr(t0, offset); 804 } 805 806 void MacroAssembler::call_VM_leaf_base(address entry_point, 807 int number_of_arguments, 808 Label *retaddr) { 809 int32_t offset = 0; 810 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 811 mv(t0, entry_point, offset); 812 jalr(t0, offset); 813 if (retaddr != nullptr) { 814 bind(*retaddr); 815 } 816 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 817 } 818 819 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 820 call_VM_leaf_base(entry_point, number_of_arguments); 821 } 822 823 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 824 pass_arg0(this, arg_0); 825 call_VM_leaf_base(entry_point, 1); 826 } 827 828 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 829 assert_different_registers(arg_1, c_rarg0); 830 pass_arg0(this, arg_0); 831 pass_arg1(this, arg_1); 832 call_VM_leaf_base(entry_point, 2); 833 } 834 835 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 836 Register arg_1, Register arg_2) { 837 assert_different_registers(arg_1, c_rarg0); 838 assert_different_registers(arg_2, c_rarg0, c_rarg1); 839 pass_arg0(this, arg_0); 840 pass_arg1(this, arg_1); 841 pass_arg2(this, arg_2); 842 call_VM_leaf_base(entry_point, 3); 843 } 844 845 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 846 pass_arg0(this, arg_0); 847 MacroAssembler::call_VM_leaf_base(entry_point, 1); 848 } 849 850 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 851 852 assert_different_registers(arg_0, c_rarg1); 853 pass_arg1(this, arg_1); 854 pass_arg0(this, arg_0); 855 MacroAssembler::call_VM_leaf_base(entry_point, 2); 856 } 857 858 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 859 assert_different_registers(arg_0, c_rarg1, c_rarg2); 860 assert_different_registers(arg_1, c_rarg2); 861 pass_arg2(this, arg_2); 862 pass_arg1(this, arg_1); 863 pass_arg0(this, arg_0); 864 MacroAssembler::call_VM_leaf_base(entry_point, 3); 865 } 866 867 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 868 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 869 assert_different_registers(arg_1, c_rarg2, c_rarg3); 870 assert_different_registers(arg_2, c_rarg3); 871 872 pass_arg3(this, arg_3); 873 pass_arg2(this, arg_2); 874 pass_arg1(this, arg_1); 875 pass_arg0(this, arg_0); 876 MacroAssembler::call_VM_leaf_base(entry_point, 4); 877 } 878 879 void MacroAssembler::la(Register Rd, const address addr) { 880 int32_t offset; 881 la(Rd, addr, offset); 882 addi(Rd, Rd, offset); 883 } 884 885 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) { 886 if (is_32bit_offset_from_codecache((int64_t)addr)) { 887 int64_t distance = addr - pc(); 888 assert(is_valid_32bit_offset(distance), "Must be"); 889 auipc(Rd, (int32_t)distance + 0x800); 890 offset = ((int32_t)distance << 20) >> 20; 891 } else { 892 assert(!CodeCache::contains(addr), "Must be"); 893 movptr(Rd, addr, offset); 894 } 895 } 896 897 void MacroAssembler::la(Register Rd, const Address &adr) { 898 switch (adr.getMode()) { 899 case Address::literal: { 900 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 901 if (rtype == relocInfo::none) { 902 mv(Rd, (intptr_t)(adr.target())); 903 } else { 904 relocate(adr.rspec(), [&] { 905 movptr(Rd, adr.target()); 906 }); 907 } 908 break; 909 } 910 case Address::base_plus_offset: { 911 Address new_adr = legitimize_address(Rd, adr); 912 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 913 addi(Rd, new_adr.base(), new_adr.offset()); 914 } 915 break; 916 } 917 default: 918 ShouldNotReachHere(); 919 } 920 } 921 922 void MacroAssembler::la(Register Rd, Label &label) { 923 IncompressibleRegion ir(this); // the label address may be patched back. 924 wrap_label(Rd, label, &MacroAssembler::la); 925 } 926 927 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 928 lui(Rd, (uint32_t)imm << 12); 929 srli(Rd, Rd, 12); 930 } 931 932 void MacroAssembler::li32(Register Rd, int32_t imm) { 933 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 934 int64_t upper = imm, lower = imm; 935 lower = (imm << 20) >> 20; 936 upper -= lower; 937 upper = (int32_t)upper; 938 // lui Rd, imm[31:12] + imm[11] 939 lui(Rd, upper); 940 addiw(Rd, Rd, lower); 941 } 942 943 void MacroAssembler::li(Register Rd, int64_t imm) { 944 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 945 // li -> c.li 946 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 947 c_li(Rd, imm); 948 return; 949 } 950 951 int shift = 12; 952 int64_t upper = imm, lower = imm; 953 // Split imm to a lower 12-bit sign-extended part and the remainder, 954 // because addi will sign-extend the lower imm. 955 lower = ((int32_t)imm << 20) >> 20; 956 upper -= lower; 957 958 // Test whether imm is a 32-bit integer. 959 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 960 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 961 while (((upper >> shift) & 1) == 0) { shift++; } 962 upper >>= shift; 963 li(Rd, upper); 964 slli(Rd, Rd, shift); 965 if (lower != 0) { 966 addi(Rd, Rd, lower); 967 } 968 } else { 969 // 32-bit integer 970 Register hi_Rd = zr; 971 if (upper != 0) { 972 lui(Rd, (int32_t)upper); 973 hi_Rd = Rd; 974 } 975 if (lower != 0 || hi_Rd == zr) { 976 addiw(Rd, hi_Rd, lower); 977 } 978 } 979 } 980 981 void MacroAssembler::load_link_jump(const address source, Register temp) { 982 assert(temp != noreg && temp != x0, "expecting a register"); 983 assert_cond(source != nullptr); 984 int64_t distance = source - pc(); 985 assert(is_simm32(distance), "Must be"); 986 auipc(temp, (int32_t)distance + 0x800); 987 ld(temp, Address(temp, ((int32_t)distance << 20) >> 20)); 988 jalr(temp); 989 } 990 991 void MacroAssembler::jump_link(const address dest, Register temp) { 992 assert(UseTrampolines, "Must be"); 993 assert_cond(dest != nullptr); 994 int64_t distance = dest - pc(); 995 assert(is_simm21(distance), "Must be"); 996 assert((distance % 2) == 0, "Must be"); 997 jal(x1, distance); 998 } 999 1000 void MacroAssembler::j(const address dest, Register temp) { 1001 assert(CodeCache::contains(dest), "Must be"); 1002 assert_cond(dest != nullptr); 1003 int64_t distance = dest - pc(); 1004 1005 // We can't patch C, i.e. if Label wasn't bound we need to patch this jump. 1006 IncompressibleRegion ir(this); 1007 if (is_simm21(distance) && ((distance % 2) == 0)) { 1008 Assembler::jal(x0, distance); 1009 } else { 1010 assert(temp != noreg && temp != x0, "expecting a register"); 1011 int32_t offset = 0; 1012 la(temp, dest, offset); 1013 jr(temp, offset); 1014 } 1015 } 1016 1017 void MacroAssembler::j(const Address &adr, Register temp) { 1018 switch (adr.getMode()) { 1019 case Address::literal: { 1020 relocate(adr.rspec(), [&] { 1021 j(adr.target(), temp); 1022 }); 1023 break; 1024 } 1025 case Address::base_plus_offset: { 1026 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; 1027 la(temp, Address(adr.base(), adr.offset() - offset)); 1028 jr(temp, offset); 1029 break; 1030 } 1031 default: 1032 ShouldNotReachHere(); 1033 } 1034 } 1035 1036 void MacroAssembler::j(Label &lab, Register temp) { 1037 assert_different_registers(x0, temp); 1038 if (lab.is_bound()) { 1039 MacroAssembler::j(target(lab), temp); 1040 } else { 1041 lab.add_patch_at(code(), locator()); 1042 MacroAssembler::j(pc(), temp); 1043 } 1044 } 1045 1046 void MacroAssembler::jr(Register Rd, int32_t offset) { 1047 assert(Rd != noreg, "expecting a register"); 1048 Assembler::jalr(x0, Rd, offset); 1049 } 1050 1051 void MacroAssembler::call(const address dest, Register temp) { 1052 assert_cond(dest != nullptr); 1053 assert(temp != noreg, "expecting a register"); 1054 int32_t offset = 0; 1055 la(temp, dest, offset); 1056 jalr(temp, offset); 1057 } 1058 1059 void MacroAssembler::jalr(Register Rs, int32_t offset) { 1060 assert(Rs != noreg, "expecting a register"); 1061 Assembler::jalr(x1, Rs, offset); 1062 } 1063 1064 void MacroAssembler::rt_call(address dest, Register tmp) { 1065 CodeBlob *cb = CodeCache::find_blob(dest); 1066 RuntimeAddress target(dest); 1067 if (cb) { 1068 far_call(target, tmp); 1069 } else { 1070 relocate(target.rspec(), [&] { 1071 int32_t offset; 1072 la(tmp, target.target(), offset); 1073 jalr(tmp, offset); 1074 }); 1075 } 1076 } 1077 1078 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 1079 if (L.is_bound()) { 1080 (this->*insn)(Rt, target(L)); 1081 } else { 1082 L.add_patch_at(code(), locator()); 1083 (this->*insn)(Rt, pc()); 1084 } 1085 } 1086 1087 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 1088 compare_and_branch_insn insn, 1089 compare_and_branch_label_insn neg_insn, bool is_far) { 1090 if (is_far) { 1091 Label done; 1092 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 1093 j(L); 1094 bind(done); 1095 } else { 1096 if (L.is_bound()) { 1097 (this->*insn)(r1, r2, target(L)); 1098 } else { 1099 L.add_patch_at(code(), locator()); 1100 (this->*insn)(r1, r2, pc()); 1101 } 1102 } 1103 } 1104 1105 #define INSN(NAME, NEG_INSN) \ 1106 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 1107 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 1108 } 1109 1110 INSN(beq, bne); 1111 INSN(bne, beq); 1112 INSN(blt, bge); 1113 INSN(bge, blt); 1114 INSN(bltu, bgeu); 1115 INSN(bgeu, bltu); 1116 1117 #undef INSN 1118 1119 #define INSN(NAME) \ 1120 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 1121 NAME(Rs, zr, dest); \ 1122 } \ 1123 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 1124 NAME(Rs, zr, l, is_far); \ 1125 } \ 1126 1127 INSN(beq); 1128 INSN(bne); 1129 INSN(blt); 1130 INSN(ble); 1131 INSN(bge); 1132 INSN(bgt); 1133 1134 #undef INSN 1135 1136 #define INSN(NAME, NEG_INSN) \ 1137 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 1138 NEG_INSN(Rt, Rs, dest); \ 1139 } \ 1140 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 1141 NEG_INSN(Rt, Rs, l, is_far); \ 1142 } 1143 1144 INSN(bgt, blt); 1145 INSN(ble, bge); 1146 INSN(bgtu, bltu); 1147 INSN(bleu, bgeu); 1148 1149 #undef INSN 1150 1151 // Float compare branch instructions 1152 1153 #define INSN(NAME, FLOATCMP, BRANCH) \ 1154 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1155 FLOATCMP##_s(t0, Rs1, Rs2); \ 1156 BRANCH(t0, l, is_far); \ 1157 } \ 1158 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1159 FLOATCMP##_d(t0, Rs1, Rs2); \ 1160 BRANCH(t0, l, is_far); \ 1161 } 1162 1163 INSN(beq, feq, bnez); 1164 INSN(bne, feq, beqz); 1165 1166 #undef INSN 1167 1168 1169 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1170 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1171 bool is_far, bool is_unordered) { \ 1172 if (is_unordered) { \ 1173 /* jump if either source is NaN or condition is expected */ \ 1174 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1175 beqz(t0, l, is_far); \ 1176 } else { \ 1177 /* jump if no NaN in source and condition is expected */ \ 1178 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1179 bnez(t0, l, is_far); \ 1180 } \ 1181 } \ 1182 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1183 bool is_far, bool is_unordered) { \ 1184 if (is_unordered) { \ 1185 /* jump if either source is NaN or condition is expected */ \ 1186 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1187 beqz(t0, l, is_far); \ 1188 } else { \ 1189 /* jump if no NaN in source and condition is expected */ \ 1190 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1191 bnez(t0, l, is_far); \ 1192 } \ 1193 } 1194 1195 INSN(ble, fle, flt); 1196 INSN(blt, flt, fle); 1197 1198 #undef INSN 1199 1200 #define INSN(NAME, CMP) \ 1201 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1202 bool is_far, bool is_unordered) { \ 1203 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1204 } \ 1205 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1206 bool is_far, bool is_unordered) { \ 1207 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1208 } 1209 1210 INSN(bgt, blt); 1211 INSN(bge, ble); 1212 1213 #undef INSN 1214 1215 1216 #define INSN(NAME, CSR) \ 1217 void MacroAssembler::NAME(Register Rd) { \ 1218 csrr(Rd, CSR); \ 1219 } 1220 1221 INSN(rdinstret, CSR_INSTRET); 1222 INSN(rdcycle, CSR_CYCLE); 1223 INSN(rdtime, CSR_TIME); 1224 INSN(frcsr, CSR_FCSR); 1225 INSN(frrm, CSR_FRM); 1226 INSN(frflags, CSR_FFLAGS); 1227 1228 #undef INSN 1229 1230 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1231 csrrs(Rd, csr, x0); 1232 } 1233 1234 #define INSN(NAME, OPFUN) \ 1235 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1236 OPFUN(x0, csr, Rs); \ 1237 } 1238 1239 INSN(csrw, csrrw); 1240 INSN(csrs, csrrs); 1241 INSN(csrc, csrrc); 1242 1243 #undef INSN 1244 1245 #define INSN(NAME, OPFUN) \ 1246 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1247 OPFUN(x0, csr, imm); \ 1248 } 1249 1250 INSN(csrwi, csrrwi); 1251 INSN(csrsi, csrrsi); 1252 INSN(csrci, csrrci); 1253 1254 #undef INSN 1255 1256 #define INSN(NAME, CSR) \ 1257 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1258 csrrw(Rd, CSR, Rs); \ 1259 } 1260 1261 INSN(fscsr, CSR_FCSR); 1262 INSN(fsrm, CSR_FRM); 1263 INSN(fsflags, CSR_FFLAGS); 1264 1265 #undef INSN 1266 1267 #define INSN(NAME) \ 1268 void MacroAssembler::NAME(Register Rs) { \ 1269 NAME(x0, Rs); \ 1270 } 1271 1272 INSN(fscsr); 1273 INSN(fsrm); 1274 INSN(fsflags); 1275 1276 #undef INSN 1277 1278 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1279 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1280 csrrwi(Rd, CSR_FRM, imm); 1281 } 1282 1283 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1284 csrrwi(Rd, CSR_FFLAGS, imm); 1285 } 1286 1287 #define INSN(NAME) \ 1288 void MacroAssembler::NAME(unsigned imm) { \ 1289 NAME(x0, imm); \ 1290 } 1291 1292 INSN(fsrmi); 1293 INSN(fsflagsi); 1294 1295 #undef INSN 1296 1297 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) { 1298 if (RestoreMXCSROnJNICalls) { 1299 Label skip_fsrmi; 1300 frrm(tmp); 1301 // Set FRM to the state we need. We do want Round to Nearest. 1302 // We don't want non-IEEE rounding modes. 1303 guarantee(RoundingMode::rne == 0, "must be"); 1304 beqz(tmp, skip_fsrmi); // Only reset FRM if it's wrong 1305 fsrmi(RoundingMode::rne); 1306 bind(skip_fsrmi); 1307 } 1308 } 1309 1310 void MacroAssembler::push_reg(Register Rs) 1311 { 1312 addi(esp, esp, 0 - wordSize); 1313 sd(Rs, Address(esp, 0)); 1314 } 1315 1316 void MacroAssembler::pop_reg(Register Rd) 1317 { 1318 ld(Rd, Address(esp, 0)); 1319 addi(esp, esp, wordSize); 1320 } 1321 1322 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1323 int count = 0; 1324 // Scan bitset to accumulate register pairs 1325 for (int reg = 31; reg >= 0; reg--) { 1326 if ((1U << 31) & bitset) { 1327 regs[count++] = reg; 1328 } 1329 bitset <<= 1; 1330 } 1331 return count; 1332 } 1333 1334 // Push integer registers in the bitset supplied. Don't push sp. 1335 // Return the number of words pushed 1336 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1337 DEBUG_ONLY(int words_pushed = 0;) 1338 unsigned char regs[32]; 1339 int count = bitset_to_regs(bitset, regs); 1340 // reserve one slot to align for odd count 1341 int offset = is_even(count) ? 0 : wordSize; 1342 1343 if (count) { 1344 addi(stack, stack, -count * wordSize - offset); 1345 } 1346 for (int i = count - 1; i >= 0; i--) { 1347 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1348 DEBUG_ONLY(words_pushed++;) 1349 } 1350 1351 assert(words_pushed == count, "oops, pushed != count"); 1352 1353 return count; 1354 } 1355 1356 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1357 DEBUG_ONLY(int words_popped = 0;) 1358 unsigned char regs[32]; 1359 int count = bitset_to_regs(bitset, regs); 1360 // reserve one slot to align for odd count 1361 int offset = is_even(count) ? 0 : wordSize; 1362 1363 for (int i = count - 1; i >= 0; i--) { 1364 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1365 DEBUG_ONLY(words_popped++;) 1366 } 1367 1368 if (count) { 1369 addi(stack, stack, count * wordSize + offset); 1370 } 1371 assert(words_popped == count, "oops, popped != count"); 1372 1373 return count; 1374 } 1375 1376 // Push floating-point registers in the bitset supplied. 1377 // Return the number of words pushed 1378 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1379 DEBUG_ONLY(int words_pushed = 0;) 1380 unsigned char regs[32]; 1381 int count = bitset_to_regs(bitset, regs); 1382 int push_slots = count + (count & 1); 1383 1384 if (count) { 1385 addi(stack, stack, -push_slots * wordSize); 1386 } 1387 1388 for (int i = count - 1; i >= 0; i--) { 1389 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1390 DEBUG_ONLY(words_pushed++;) 1391 } 1392 1393 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1394 1395 return count; 1396 } 1397 1398 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1399 DEBUG_ONLY(int words_popped = 0;) 1400 unsigned char regs[32]; 1401 int count = bitset_to_regs(bitset, regs); 1402 int pop_slots = count + (count & 1); 1403 1404 for (int i = count - 1; i >= 0; i--) { 1405 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1406 DEBUG_ONLY(words_popped++;) 1407 } 1408 1409 if (count) { 1410 addi(stack, stack, pop_slots * wordSize); 1411 } 1412 1413 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1414 1415 return count; 1416 } 1417 1418 static const int64_t right_32_bits = right_n_bits(32); 1419 static const int64_t right_8_bits = right_n_bits(8); 1420 1421 /** 1422 * Emits code to update CRC-32 with a byte value according to constants in table 1423 * 1424 * @param [in,out]crc Register containing the crc. 1425 * @param [in]val Register containing the byte to fold into the CRC. 1426 * @param [in]table Register containing the table of crc constants. 1427 * 1428 * uint32_t crc; 1429 * val = crc_table[(val ^ crc) & 0xFF]; 1430 * crc = val ^ (crc >> 8); 1431 * 1432 */ 1433 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 1434 assert_different_registers(crc, val, table); 1435 1436 xorr(val, val, crc); 1437 andi(val, val, right_8_bits); 1438 shadd(val, val, table, val, 2); 1439 lwu(val, Address(val)); 1440 srli(crc, crc, 8); 1441 xorr(crc, val, crc); 1442 } 1443 1444 /** 1445 * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 1446 * 1447 * @param [in,out]crc Register containing the crc. 1448 * @param [in]v Register containing the 32-bit to fold into the CRC. 1449 * @param [in]table0 Register containing table 0 of crc constants. 1450 * @param [in]table1 Register containing table 1 of crc constants. 1451 * @param [in]table2 Register containing table 2 of crc constants. 1452 * @param [in]table3 Register containing table 3 of crc constants. 1453 * 1454 * uint32_t crc; 1455 * v = crc ^ v 1456 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 1457 * 1458 */ 1459 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3, 1460 Register table0, Register table1, Register table2, Register table3, bool upper) { 1461 assert_different_registers(crc, v, tmp1, tmp2, tmp3, table0, table1, table2, table3); 1462 1463 if (upper) 1464 srli(v, v, 32); 1465 xorr(v, v, crc); 1466 1467 andi(tmp1, v, right_8_bits); 1468 shadd(tmp1, tmp1, table3, tmp2, 2); 1469 lwu(crc, Address(tmp1)); 1470 1471 slli(tmp1, v, 16); 1472 slli(tmp3, v, 8); 1473 1474 srliw(tmp1, tmp1, 24); 1475 srliw(tmp3, tmp3, 24); 1476 1477 shadd(tmp1, tmp1, table2, tmp1, 2); 1478 lwu(tmp2, Address(tmp1)); 1479 1480 shadd(tmp3, tmp3, table1, tmp3, 2); 1481 xorr(crc, crc, tmp2); 1482 1483 lwu(tmp2, Address(tmp3)); 1484 // It is more optimal to use 'srli' instead of 'srliw' for case when it is not necessary to clean upper bits 1485 if (upper) 1486 srli(tmp1, v, 24); 1487 else 1488 srliw(tmp1, v, 24); 1489 1490 // no need to clear bits other than lowest two 1491 shadd(tmp1, tmp1, table0, tmp1, 2); 1492 xorr(crc, crc, tmp2); 1493 lwu(tmp2, Address(tmp1)); 1494 xorr(crc, crc, tmp2); 1495 } 1496 1497 1498 #ifdef COMPILER2 1499 // This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c. 1500 // To make it, following steps are taken: 1501 // 1. in zcrc32.c, modify N to 16 and related code, 1502 // 2. re-generate the tables needed, we use tables of (N == 16, W == 4) 1503 // 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code). 1504 // New tables for vector version is after table3. 1505 void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register len, 1506 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 1507 Register table0, Register table3) { 1508 assert_different_registers(t1, crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp5, table0, table3); 1509 const int N = 16, W = 4; 1510 const int64_t single_table_size = 256; 1511 const Register blks = tmp2; 1512 const Register tmpTable = tmp3, tableN16 = tmp4; 1513 const VectorRegister vcrc = v4, vword = v8, vtmp = v12; 1514 Label VectorLoop; 1515 Label LastBlock; 1516 1517 add(tableN16, table3, 1*single_table_size*sizeof(juint), tmp1); 1518 mv(tmp5, 0xff); 1519 1520 if (MaxVectorSize == 16) { 1521 vsetivli(zr, N, Assembler::e32, Assembler::m4, Assembler::ma, Assembler::ta); 1522 } else if (MaxVectorSize == 32) { 1523 vsetivli(zr, N, Assembler::e32, Assembler::m2, Assembler::ma, Assembler::ta); 1524 } else { 1525 assert(MaxVectorSize > 32, "sanity"); 1526 vsetivli(zr, N, Assembler::e32, Assembler::m1, Assembler::ma, Assembler::ta); 1527 } 1528 1529 vmv_v_x(vcrc, zr); 1530 vmv_s_x(vcrc, crc); 1531 1532 // multiple of 64 1533 srli(blks, len, 6); 1534 slli(t1, blks, 6); 1535 sub(len, len, t1); 1536 sub(blks, blks, 1); 1537 blez(blks, LastBlock); 1538 1539 bind(VectorLoop); 1540 { 1541 mv(tmpTable, tableN16); 1542 1543 vle32_v(vword, buf); 1544 vxor_vv(vword, vword, vcrc); 1545 1546 addi(buf, buf, N*4); 1547 1548 vand_vx(vtmp, vword, tmp5); 1549 vsll_vi(vtmp, vtmp, 2); 1550 vluxei32_v(vcrc, tmpTable, vtmp); 1551 1552 mv(tmp1, 1); 1553 for (int k = 1; k < W; k++) { 1554 addi(tmpTable, tmpTable, single_table_size*4); 1555 1556 slli(t1, tmp1, 3); 1557 vsrl_vx(vtmp, vword, t1); 1558 1559 vand_vx(vtmp, vtmp, tmp5); 1560 vsll_vi(vtmp, vtmp, 2); 1561 vluxei32_v(vtmp, tmpTable, vtmp); 1562 1563 vxor_vv(vcrc, vcrc, vtmp); 1564 1565 addi(tmp1, tmp1, 1); 1566 } 1567 1568 sub(blks, blks, 1); 1569 bgtz(blks, VectorLoop); 1570 } 1571 1572 bind(LastBlock); 1573 { 1574 vle32_v(vtmp, buf); 1575 vxor_vv(vcrc, vcrc, vtmp); 1576 mv(crc, zr); 1577 for (int i = 0; i < N; i++) { 1578 vmv_x_s(tmp2, vcrc); 1579 // in vmv_x_s, the value is sign-extended to SEW bits, but we need zero-extended here. 1580 zext_w(tmp2, tmp2); 1581 vslidedown_vi(vcrc, vcrc, 1); 1582 xorr(crc, crc, tmp2); 1583 for (int j = 0; j < W; j++) { 1584 andr(t1, crc, tmp5); 1585 shadd(t1, t1, table0, tmp1, 2); 1586 lwu(t1, Address(t1, 0)); 1587 srli(tmp2, crc, 8); 1588 xorr(crc, tmp2, t1); 1589 } 1590 } 1591 addi(buf, buf, N*4); 1592 } 1593 } 1594 #endif // COMPILER2 1595 1596 /** 1597 * @param crc register containing existing CRC (32-bit) 1598 * @param buf register pointing to input byte buffer (byte*) 1599 * @param len register containing number of bytes 1600 * @param table register that will contain address of CRC table 1601 * @param tmp scratch registers 1602 */ 1603 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 1604 Register table0, Register table1, Register table2, Register table3, 1605 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) { 1606 assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 1607 Label L_vector_entry, 1608 L_unroll_loop, 1609 L_by4_loop_entry, L_by4_loop, 1610 L_by1_loop, L_exit; 1611 1612 const int64_t single_table_size = 256; 1613 const int64_t unroll = 16; 1614 const int64_t unroll_words = unroll*wordSize; 1615 mv(tmp5, right_32_bits); 1616 andn(crc, tmp5, crc); 1617 1618 const ExternalAddress table_addr = StubRoutines::crc_table_addr(); 1619 la(table0, table_addr); 1620 add(table1, table0, 1*single_table_size*sizeof(juint), tmp1); 1621 add(table2, table0, 2*single_table_size*sizeof(juint), tmp1); 1622 add(table3, table2, 1*single_table_size*sizeof(juint), tmp1); 1623 1624 #ifdef COMPILER2 1625 if (UseRVV) { 1626 const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5; 1627 mv(tmp1, tmp_limit); 1628 bge(len, tmp1, L_vector_entry); 1629 } 1630 #endif // COMPILER2 1631 1632 mv(tmp1, unroll_words); 1633 blt(len, tmp1, L_by4_loop_entry); 1634 1635 const Register loop_buf_end = tmp3; 1636 1637 align(CodeEntryAlignment); 1638 // Entry for L_unroll_loop 1639 add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below 1640 andi(len, len, unroll_words-1); // len = (len % unroll_words) 1641 sub(loop_buf_end, loop_buf_end, len); 1642 bind(L_unroll_loop); 1643 for (int i = 0; i < unroll; i++) { 1644 ld(tmp1, Address(buf, i*wordSize)); 1645 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1646 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, true); 1647 } 1648 1649 addi(buf, buf, unroll_words); 1650 blt(buf, loop_buf_end, L_unroll_loop); 1651 1652 bind(L_by4_loop_entry); 1653 mv(tmp1, 4); 1654 blt(len, tmp1, L_by1_loop); 1655 add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below 1656 andi(len, len, 3); 1657 sub(loop_buf_end, loop_buf_end, len); 1658 bind(L_by4_loop); 1659 lwu(tmp1, Address(buf)); 1660 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1661 addi(buf, buf, 4); 1662 blt(buf, loop_buf_end, L_by4_loop); 1663 1664 bind(L_by1_loop); 1665 beqz(len, L_exit); 1666 1667 subw(len, len, 1); 1668 lwu(tmp1, Address(buf)); 1669 andi(tmp2, tmp1, right_8_bits); 1670 update_byte_crc32(crc, tmp2, table0); 1671 beqz(len, L_exit); 1672 1673 subw(len, len, 1); 1674 srli(tmp2, tmp1, 8); 1675 andi(tmp2, tmp2, right_8_bits); 1676 update_byte_crc32(crc, tmp2, table0); 1677 beqz(len, L_exit); 1678 1679 subw(len, len, 1); 1680 srli(tmp2, tmp1, 16); 1681 andi(tmp2, tmp2, right_8_bits); 1682 update_byte_crc32(crc, tmp2, table0); 1683 1684 #ifdef COMPILER2 1685 // put vector code here, otherwise "offset is too large" error occurs. 1686 if (UseRVV) { 1687 // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`. 1688 j(L_exit); 1689 1690 bind(L_vector_entry); 1691 vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3); 1692 1693 bgtz(len, L_by4_loop_entry); 1694 } 1695 #endif // COMPILER2 1696 1697 bind(L_exit); 1698 andn(crc, tmp5, crc); 1699 } 1700 1701 #ifdef COMPILER2 1702 // Push vector registers in the bitset supplied. 1703 // Return the number of words pushed 1704 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1705 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1706 1707 // Scan bitset to accumulate register pairs 1708 unsigned char regs[32]; 1709 int count = bitset_to_regs(bitset, regs); 1710 1711 for (int i = 0; i < count; i++) { 1712 sub(stack, stack, vector_size_in_bytes); 1713 vs1r_v(as_VectorRegister(regs[i]), stack); 1714 } 1715 1716 return count * vector_size_in_bytes / wordSize; 1717 } 1718 1719 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1720 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1721 1722 // Scan bitset to accumulate register pairs 1723 unsigned char regs[32]; 1724 int count = bitset_to_regs(bitset, regs); 1725 1726 for (int i = count - 1; i >= 0; i--) { 1727 vl1r_v(as_VectorRegister(regs[i]), stack); 1728 add(stack, stack, vector_size_in_bytes); 1729 } 1730 1731 return count * vector_size_in_bytes / wordSize; 1732 } 1733 #endif // COMPILER2 1734 1735 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1736 // Push integer registers x7, x10-x17, x28-x31. 1737 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1738 1739 // Push float registers f0-f7, f10-f17, f28-f31. 1740 addi(sp, sp, - wordSize * 20); 1741 int offset = 0; 1742 for (int i = 0; i < 32; i++) { 1743 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1744 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1745 } 1746 } 1747 } 1748 1749 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1750 int offset = 0; 1751 for (int i = 0; i < 32; i++) { 1752 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1753 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1754 } 1755 } 1756 addi(sp, sp, wordSize * 20); 1757 1758 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1759 } 1760 1761 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1762 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1763 push_reg(RegSet::range(x5, x31), sp); 1764 1765 // float registers 1766 addi(sp, sp, - 32 * wordSize); 1767 for (int i = 0; i < 32; i++) { 1768 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1769 } 1770 1771 // vector registers 1772 if (save_vectors) { 1773 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1774 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1775 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1776 add(t0, sp, vector_size_in_bytes * i); 1777 vse64_v(as_VectorRegister(i), t0); 1778 } 1779 } 1780 } 1781 1782 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1783 // vector registers 1784 if (restore_vectors) { 1785 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1786 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1787 vle64_v(as_VectorRegister(i), sp); 1788 add(sp, sp, vector_size_in_bytes * 8); 1789 } 1790 } 1791 1792 // float registers 1793 for (int i = 0; i < 32; i++) { 1794 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1795 } 1796 addi(sp, sp, 32 * wordSize); 1797 1798 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1799 pop_reg(RegSet::range(x5, x31), sp); 1800 } 1801 1802 static int patch_offset_in_jal(address branch, int64_t offset) { 1803 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1804 "offset is too large to be patched in one jal instruction!\n"); 1805 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1806 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1807 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1808 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1809 return MacroAssembler::instruction_size; // only one instruction 1810 } 1811 1812 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1813 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1814 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1815 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1816 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1817 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1818 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1819 return MacroAssembler::instruction_size; // only one instruction 1820 } 1821 1822 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1823 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1824 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1825 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1826 return PC_RELATIVE_INSTRUCTION_NUM * MacroAssembler::instruction_size; 1827 } 1828 1829 static int patch_addr_in_movptr1(address branch, address target) { 1830 int32_t lower = ((intptr_t)target << 35) >> 35; 1831 int64_t upper = ((intptr_t)target - lower) >> 29; 1832 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1833 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1834 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1835 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1836 return MacroAssembler::movptr1_instruction_size; 1837 } 1838 1839 static int patch_addr_in_movptr2(address instruction_address, address target) { 1840 uintptr_t addr = (uintptr_t)target; 1841 1842 assert(addr < (1ull << 48), "48-bit overflow in address constant"); 1843 unsigned int upper18 = (addr >> 30ull); 1844 int lower30 = (addr & 0x3fffffffu); 1845 int low12 = (lower30 << 20) >> 20; 1846 int mid18 = ((lower30 - low12) >> 12); 1847 1848 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 0), 31, 12, (upper18 & 0xfffff)); // Lui 1849 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 1), 31, 12, (mid18 & 0xfffff)); // Lui 1850 // Slli 1851 // Add 1852 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 4), 31, 20, low12 & 0xfff); // Addi/Jalr/Load 1853 1854 assert(MacroAssembler::target_addr_for_insn(instruction_address) == target, "Must be"); 1855 1856 return MacroAssembler::movptr2_instruction_size; 1857 } 1858 1859 static int patch_imm_in_li16u(address branch, uint16_t target) { 1860 Assembler::patch(branch, 31, 12, target); // patch lui only 1861 return MacroAssembler::instruction_size; 1862 } 1863 1864 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1865 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1866 int64_t upper = (intptr_t)target; 1867 int32_t lower = (((int32_t)target) << 20) >> 20; 1868 upper -= lower; 1869 upper = (int32_t)upper; 1870 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1871 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1872 return LI32_INSTRUCTIONS_NUM * MacroAssembler::instruction_size; 1873 } 1874 1875 static long get_offset_of_jal(address insn_addr) { 1876 assert_cond(insn_addr != nullptr); 1877 long offset = 0; 1878 unsigned insn = Assembler::ld_instr(insn_addr); 1879 long val = (long)Assembler::sextract(insn, 31, 12); 1880 offset |= ((val >> 19) & 0x1) << 20; 1881 offset |= (val & 0xff) << 12; 1882 offset |= ((val >> 8) & 0x1) << 11; 1883 offset |= ((val >> 9) & 0x3ff) << 1; 1884 offset = (offset << 43) >> 43; 1885 return offset; 1886 } 1887 1888 static long get_offset_of_conditional_branch(address insn_addr) { 1889 long offset = 0; 1890 assert_cond(insn_addr != nullptr); 1891 unsigned insn = Assembler::ld_instr(insn_addr); 1892 offset = (long)Assembler::sextract(insn, 31, 31); 1893 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1894 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1895 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1896 offset = (offset << 41) >> 41; 1897 return offset; 1898 } 1899 1900 static long get_offset_of_pc_relative(address insn_addr) { 1901 long offset = 0; 1902 assert_cond(insn_addr != nullptr); 1903 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1904 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1905 offset = (offset << 32) >> 32; 1906 return offset; 1907 } 1908 1909 static address get_target_of_movptr1(address insn_addr) { 1910 assert_cond(insn_addr != nullptr); 1911 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1912 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1913 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1914 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1915 return (address) target_address; 1916 } 1917 1918 static address get_target_of_movptr2(address insn_addr) { 1919 assert_cond(insn_addr != nullptr); 1920 int32_t upper18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 0), 31, 12)) & 0xfffff); // Lui 1921 int32_t mid18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 1), 31, 12)) & 0xfffff); // Lui 1922 // 2 // Slli 1923 // 3 // Add 1924 int32_t low12 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 4), 31, 20))); // Addi/Jalr/Load. 1925 address ret = (address)(((intptr_t)upper18<<30ll) + ((intptr_t)mid18<<12ll) + low12); 1926 return ret; 1927 } 1928 1929 address MacroAssembler::get_target_of_li32(address insn_addr) { 1930 assert_cond(insn_addr != nullptr); 1931 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1932 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1933 return (address)target_address; 1934 } 1935 1936 // Patch any kind of instruction; there may be several instructions. 1937 // Return the total length (in bytes) of the instructions. 1938 int MacroAssembler::pd_patch_instruction_size(address instruction_address, address target) { 1939 assert_cond(instruction_address != nullptr); 1940 int64_t offset = target - instruction_address; 1941 if (MacroAssembler::is_jal_at(instruction_address)) { // jal 1942 return patch_offset_in_jal(instruction_address, offset); 1943 } else if (MacroAssembler::is_branch_at(instruction_address)) { // beq/bge/bgeu/blt/bltu/bne 1944 return patch_offset_in_conditional_branch(instruction_address, offset); 1945 } else if (MacroAssembler::is_pc_relative_at(instruction_address)) { // auipc, addi/jalr/load 1946 return patch_offset_in_pc_relative(instruction_address, offset); 1947 } else if (MacroAssembler::is_movptr1_at(instruction_address)) { // movptr1 1948 return patch_addr_in_movptr1(instruction_address, target); 1949 } else if (MacroAssembler::is_movptr2_at(instruction_address)) { // movptr2 1950 return patch_addr_in_movptr2(instruction_address, target); 1951 } else if (MacroAssembler::is_li32_at(instruction_address)) { // li32 1952 int64_t imm = (intptr_t)target; 1953 return patch_imm_in_li32(instruction_address, (int32_t)imm); 1954 } else if (MacroAssembler::is_li16u_at(instruction_address)) { 1955 int64_t imm = (intptr_t)target; 1956 return patch_imm_in_li16u(instruction_address, (uint16_t)imm); 1957 } else { 1958 #ifdef ASSERT 1959 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1960 Assembler::ld_instr(instruction_address), p2i(instruction_address)); 1961 Disassembler::decode(instruction_address - 16, instruction_address + 16); 1962 #endif 1963 ShouldNotReachHere(); 1964 return -1; 1965 } 1966 } 1967 1968 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1969 long offset = 0; 1970 assert_cond(insn_addr != nullptr); 1971 if (MacroAssembler::is_jal_at(insn_addr)) { // jal 1972 offset = get_offset_of_jal(insn_addr); 1973 } else if (MacroAssembler::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1974 offset = get_offset_of_conditional_branch(insn_addr); 1975 } else if (MacroAssembler::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1976 offset = get_offset_of_pc_relative(insn_addr); 1977 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { // movptr1 1978 return get_target_of_movptr1(insn_addr); 1979 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { // movptr2 1980 return get_target_of_movptr2(insn_addr); 1981 } else if (MacroAssembler::is_li32_at(insn_addr)) { // li32 1982 return get_target_of_li32(insn_addr); 1983 } else { 1984 ShouldNotReachHere(); 1985 } 1986 return address(((uintptr_t)insn_addr + offset)); 1987 } 1988 1989 int MacroAssembler::patch_oop(address insn_addr, address o) { 1990 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1991 // narrow OOPs by setting the upper 16 bits in the first 1992 // instruction. 1993 if (MacroAssembler::is_li32_at(insn_addr)) { 1994 // Move narrow OOP 1995 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1996 return patch_imm_in_li32(insn_addr, (int32_t)n); 1997 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { 1998 // Move wide OOP 1999 return patch_addr_in_movptr1(insn_addr, o); 2000 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { 2001 // Move wide OOP 2002 return patch_addr_in_movptr2(insn_addr, o); 2003 } 2004 ShouldNotReachHere(); 2005 return -1; 2006 } 2007 2008 void MacroAssembler::reinit_heapbase() { 2009 if (UseCompressedOops) { 2010 if (Universe::is_fully_initialized()) { 2011 mv(xheapbase, CompressedOops::base()); 2012 } else { 2013 ExternalAddress target(CompressedOops::base_addr()); 2014 relocate(target.rspec(), [&] { 2015 int32_t offset; 2016 la(xheapbase, target.target(), offset); 2017 ld(xheapbase, Address(xheapbase, offset)); 2018 }); 2019 } 2020 } 2021 } 2022 2023 void MacroAssembler::movptr(Register Rd, address addr, Register temp) { 2024 int offset = 0; 2025 movptr(Rd, addr, offset, temp); 2026 addi(Rd, Rd, offset); 2027 } 2028 2029 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset, Register temp) { 2030 uint64_t uimm64 = (uint64_t)addr; 2031 #ifndef PRODUCT 2032 { 2033 char buffer[64]; 2034 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, uimm64); 2035 block_comment(buffer); 2036 } 2037 #endif 2038 assert(uimm64 < (1ull << 48), "48-bit overflow in address constant"); 2039 2040 if (temp == noreg) { 2041 movptr1(Rd, uimm64, offset); 2042 } else { 2043 movptr2(Rd, uimm64, offset, temp); 2044 } 2045 } 2046 2047 void MacroAssembler::movptr1(Register Rd, uint64_t imm64, int32_t &offset) { 2048 // Load upper 31 bits 2049 // 2050 // In case of 11th bit of `lower` is 0, it's straightforward to understand. 2051 // In case of 11th bit of `lower` is 1, it's a bit tricky, to help understand, 2052 // imagine divide both `upper` and `lower` into 2 parts respectively, i.e. 2053 // [upper_20, upper_12], [lower_20, lower_12], they are the same just before 2054 // `lower = (lower << 52) >> 52;`. 2055 // After `upper -= lower;`, 2056 // upper_20' = upper_20 - (-1) == upper_20 + 1 2057 // upper_12 = 0x000 2058 // After `lui(Rd, upper);`, `Rd` = upper_20' << 12 2059 // Also divide `Rd` into 2 parts [Rd_20, Rd_12], 2060 // Rd_20 == upper_20' 2061 // Rd_12 == 0x000 2062 // After `addi(Rd, Rd, lower);`, 2063 // Rd_20 = upper_20' + (-1) == upper_20 + 1 - 1 = upper_20 2064 // Rd_12 = lower_12 2065 // So, finally Rd == [upper_20, lower_12] 2066 int64_t imm = imm64 >> 17; 2067 int64_t upper = imm, lower = imm; 2068 lower = (lower << 52) >> 52; 2069 upper -= lower; 2070 upper = (int32_t)upper; 2071 lui(Rd, upper); 2072 addi(Rd, Rd, lower); 2073 2074 // Load the rest 17 bits. 2075 slli(Rd, Rd, 11); 2076 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 2077 slli(Rd, Rd, 6); 2078 2079 // This offset will be used by following jalr/ld. 2080 offset = imm64 & 0x3f; 2081 } 2082 2083 void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Register tmp) { 2084 assert_different_registers(Rd, tmp, noreg); 2085 2086 // addr: [upper18, lower30[mid18, lower12]] 2087 2088 int64_t upper18 = addr >> 18; 2089 lui(tmp, upper18); 2090 2091 int64_t lower30 = addr & 0x3fffffff; 2092 int64_t mid18 = lower30, lower12 = lower30; 2093 lower12 = (lower12 << 52) >> 52; 2094 // For this tricky part (`mid18 -= lower12;` + `offset = lower12;`), 2095 // please refer to movptr1 above. 2096 mid18 -= (int32_t)lower12; 2097 lui(Rd, mid18); 2098 2099 slli(tmp, tmp, 18); 2100 add(Rd, Rd, tmp); 2101 2102 offset = lower12; 2103 } 2104 2105 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 2106 if (is_simm12(increment)) { 2107 addi(Rd, Rn, increment); 2108 } else { 2109 assert_different_registers(Rn, temp); 2110 li(temp, increment); 2111 add(Rd, Rn, temp); 2112 } 2113 } 2114 2115 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 2116 if (is_simm12(increment)) { 2117 addiw(Rd, Rn, increment); 2118 } else { 2119 assert_different_registers(Rn, temp); 2120 li(temp, increment); 2121 addw(Rd, Rn, temp); 2122 } 2123 } 2124 2125 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 2126 add(Rd, Rn, -decrement, temp); 2127 } 2128 2129 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 2130 addw(Rd, Rn, -decrement, temp); 2131 } 2132 2133 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 2134 andr(Rd, Rs1, Rs2); 2135 sign_extend(Rd, Rd, 32); 2136 } 2137 2138 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 2139 orr(Rd, Rs1, Rs2); 2140 sign_extend(Rd, Rd, 32); 2141 } 2142 2143 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 2144 xorr(Rd, Rs1, Rs2); 2145 sign_extend(Rd, Rd, 32); 2146 } 2147 2148 // Rd = Rs1 & (~Rd2) 2149 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 2150 if (UseZbb) { 2151 Assembler::andn(Rd, Rs1, Rs2); 2152 return; 2153 } 2154 2155 notr(Rd, Rs2); 2156 andr(Rd, Rs1, Rd); 2157 } 2158 2159 // Rd = Rs1 | (~Rd2) 2160 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 2161 if (UseZbb) { 2162 Assembler::orn(Rd, Rs1, Rs2); 2163 return; 2164 } 2165 2166 notr(Rd, Rs2); 2167 orr(Rd, Rs1, Rd); 2168 } 2169 2170 // Note: load_unsigned_short used to be called load_unsigned_word. 2171 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2172 int off = offset(); 2173 lhu(dst, src); 2174 return off; 2175 } 2176 2177 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2178 int off = offset(); 2179 lbu(dst, src); 2180 return off; 2181 } 2182 2183 int MacroAssembler::load_signed_short(Register dst, Address src) { 2184 int off = offset(); 2185 lh(dst, src); 2186 return off; 2187 } 2188 2189 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2190 int off = offset(); 2191 lb(dst, src); 2192 return off; 2193 } 2194 2195 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2196 switch (size_in_bytes) { 2197 case 8: ld(dst, src); break; 2198 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 2199 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2200 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2201 default: ShouldNotReachHere(); 2202 } 2203 } 2204 2205 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 2206 switch (size_in_bytes) { 2207 case 8: sd(src, dst); break; 2208 case 4: sw(src, dst); break; 2209 case 2: sh(src, dst); break; 2210 case 1: sb(src, dst); break; 2211 default: ShouldNotReachHere(); 2212 } 2213 } 2214 2215 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 2216 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2217 if (granularity != 1 && granularity != 2) { 2218 ShouldNotReachHere(); 2219 } 2220 if (AvoidUnalignedAccesses && (granularity != 2)) { 2221 assert_different_registers(dst, tmp); 2222 assert_different_registers(tmp, src.base()); 2223 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 2224 slli(tmp, tmp, 8); 2225 lbu(dst, src); 2226 add(dst, dst, tmp); 2227 } else { 2228 is_signed ? lh(dst, src) : lhu(dst, src); 2229 } 2230 } 2231 2232 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 2233 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2234 if (AvoidUnalignedAccesses && (granularity != 4)) { 2235 switch(granularity) { 2236 case 1: 2237 assert_different_registers(dst, tmp, src.base()); 2238 lbu(dst, src); 2239 lbu(tmp, Address(src.base(), src.offset() + 1)); 2240 slli(tmp, tmp, 8); 2241 add(dst, dst, tmp); 2242 lbu(tmp, Address(src.base(), src.offset() + 2)); 2243 slli(tmp, tmp, 16); 2244 add(dst, dst, tmp); 2245 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 2246 slli(tmp, tmp, 24); 2247 add(dst, dst, tmp); 2248 break; 2249 case 2: 2250 assert_different_registers(dst, tmp); 2251 assert_different_registers(tmp, src.base()); 2252 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 2253 slli(tmp, tmp, 16); 2254 lhu(dst, src); 2255 add(dst, dst, tmp); 2256 break; 2257 default: 2258 ShouldNotReachHere(); 2259 } 2260 } else { 2261 is_signed ? lw(dst, src) : lwu(dst, src); 2262 } 2263 } 2264 2265 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 2266 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 2267 if (AvoidUnalignedAccesses && (granularity != 8)) { 2268 switch(granularity){ 2269 case 1: 2270 assert_different_registers(dst, tmp, src.base()); 2271 lbu(dst, src); 2272 lbu(tmp, Address(src.base(), src.offset() + 1)); 2273 slli(tmp, tmp, 8); 2274 add(dst, dst, tmp); 2275 lbu(tmp, Address(src.base(), src.offset() + 2)); 2276 slli(tmp, tmp, 16); 2277 add(dst, dst, tmp); 2278 lbu(tmp, Address(src.base(), src.offset() + 3)); 2279 slli(tmp, tmp, 24); 2280 add(dst, dst, tmp); 2281 lbu(tmp, Address(src.base(), src.offset() + 4)); 2282 slli(tmp, tmp, 32); 2283 add(dst, dst, tmp); 2284 lbu(tmp, Address(src.base(), src.offset() + 5)); 2285 slli(tmp, tmp, 40); 2286 add(dst, dst, tmp); 2287 lbu(tmp, Address(src.base(), src.offset() + 6)); 2288 slli(tmp, tmp, 48); 2289 add(dst, dst, tmp); 2290 lbu(tmp, Address(src.base(), src.offset() + 7)); 2291 slli(tmp, tmp, 56); 2292 add(dst, dst, tmp); 2293 break; 2294 case 2: 2295 assert_different_registers(dst, tmp, src.base()); 2296 lhu(dst, src); 2297 lhu(tmp, Address(src.base(), src.offset() + 2)); 2298 slli(tmp, tmp, 16); 2299 add(dst, dst, tmp); 2300 lhu(tmp, Address(src.base(), src.offset() + 4)); 2301 slli(tmp, tmp, 32); 2302 add(dst, dst, tmp); 2303 lhu(tmp, Address(src.base(), src.offset() + 6)); 2304 slli(tmp, tmp, 48); 2305 add(dst, dst, tmp); 2306 break; 2307 case 4: 2308 assert_different_registers(dst, tmp); 2309 assert_different_registers(tmp, src.base()); 2310 lwu(tmp, Address(src.base(), src.offset() + 4)); 2311 slli(tmp, tmp, 32); 2312 lwu(dst, src); 2313 add(dst, dst, tmp); 2314 break; 2315 default: 2316 ShouldNotReachHere(); 2317 } 2318 } else { 2319 ld(dst, src); 2320 } 2321 } 2322 2323 2324 // reverse bytes in halfword in lower 16 bits and sign-extend 2325 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 2326 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 2327 if (UseZbb) { 2328 rev8(Rd, Rs); 2329 srai(Rd, Rd, 48); 2330 return; 2331 } 2332 assert_different_registers(Rs, tmp); 2333 assert_different_registers(Rd, tmp); 2334 srli(tmp, Rs, 8); 2335 andi(tmp, tmp, 0xFF); 2336 slli(Rd, Rs, 56); 2337 srai(Rd, Rd, 48); // sign-extend 2338 orr(Rd, Rd, tmp); 2339 } 2340 2341 // reverse bytes in lower word and sign-extend 2342 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 2343 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2344 if (UseZbb) { 2345 rev8(Rd, Rs); 2346 srai(Rd, Rd, 32); 2347 return; 2348 } 2349 assert_different_registers(Rs, tmp1, tmp2); 2350 assert_different_registers(Rd, tmp1, tmp2); 2351 revb_h_w_u(Rd, Rs, tmp1, tmp2); 2352 slli(tmp2, Rd, 48); 2353 srai(tmp2, tmp2, 32); // sign-extend 2354 srli(Rd, Rd, 16); 2355 orr(Rd, Rd, tmp2); 2356 } 2357 2358 // reverse bytes in halfword in lower 16 bits and zero-extend 2359 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2360 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 2361 if (UseZbb) { 2362 rev8(Rd, Rs); 2363 srli(Rd, Rd, 48); 2364 return; 2365 } 2366 assert_different_registers(Rs, tmp); 2367 assert_different_registers(Rd, tmp); 2368 srli(tmp, Rs, 8); 2369 andi(tmp, tmp, 0xFF); 2370 andi(Rd, Rs, 0xFF); 2371 slli(Rd, Rd, 8); 2372 orr(Rd, Rd, tmp); 2373 } 2374 2375 // reverse bytes in halfwords in lower 32 bits and zero-extend 2376 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2377 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2378 if (UseZbb) { 2379 rev8(Rd, Rs); 2380 rori(Rd, Rd, 32); 2381 roriw(Rd, Rd, 16); 2382 zero_extend(Rd, Rd, 32); 2383 return; 2384 } 2385 assert_different_registers(Rs, tmp1, tmp2); 2386 assert_different_registers(Rd, tmp1, tmp2); 2387 srli(tmp2, Rs, 16); 2388 revb_h_h_u(tmp2, tmp2, tmp1); 2389 revb_h_h_u(Rd, Rs, tmp1); 2390 slli(tmp2, tmp2, 16); 2391 orr(Rd, Rd, tmp2); 2392 } 2393 2394 // This method is only used for revb_h 2395 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 2396 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2397 assert_different_registers(Rs, tmp1, tmp2); 2398 assert_different_registers(Rd, tmp1); 2399 srli(tmp1, Rs, 48); 2400 andi(tmp2, tmp1, 0xFF); 2401 slli(tmp2, tmp2, 8); 2402 srli(tmp1, tmp1, 8); 2403 orr(tmp1, tmp1, tmp2); 2404 slli(Rd, Rs, 16); 2405 orr(Rd, Rd, tmp1); 2406 } 2407 2408 // reverse bytes in each halfword 2409 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 2410 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2411 if (UseZbb) { 2412 assert_different_registers(Rs, tmp1); 2413 assert_different_registers(Rd, tmp1); 2414 rev8(Rd, Rs); 2415 zero_extend(tmp1, Rd, 32); 2416 roriw(tmp1, tmp1, 16); 2417 slli(tmp1, tmp1, 32); 2418 srli(Rd, Rd, 32); 2419 roriw(Rd, Rd, 16); 2420 zero_extend(Rd, Rd, 32); 2421 orr(Rd, Rd, tmp1); 2422 return; 2423 } 2424 assert_different_registers(Rs, tmp1, tmp2); 2425 assert_different_registers(Rd, tmp1, tmp2); 2426 revb_h_helper(Rd, Rs, tmp1, tmp2); 2427 for (int i = 0; i < 3; ++i) { 2428 revb_h_helper(Rd, Rd, tmp1, tmp2); 2429 } 2430 } 2431 2432 // reverse bytes in each word 2433 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 2434 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2435 if (UseZbb) { 2436 rev8(Rd, Rs); 2437 rori(Rd, Rd, 32); 2438 return; 2439 } 2440 assert_different_registers(Rs, tmp1, tmp2); 2441 assert_different_registers(Rd, tmp1, tmp2); 2442 revb(Rd, Rs, tmp1, tmp2); 2443 ror_imm(Rd, Rd, 32); 2444 } 2445 2446 // reverse bytes in doubleword 2447 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 2448 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2449 if (UseZbb) { 2450 rev8(Rd, Rs); 2451 return; 2452 } 2453 assert_different_registers(Rs, tmp1, tmp2); 2454 assert_different_registers(Rd, tmp1, tmp2); 2455 andi(tmp1, Rs, 0xFF); 2456 slli(tmp1, tmp1, 8); 2457 for (int step = 8; step < 56; step += 8) { 2458 srli(tmp2, Rs, step); 2459 andi(tmp2, tmp2, 0xFF); 2460 orr(tmp1, tmp1, tmp2); 2461 slli(tmp1, tmp1, 8); 2462 } 2463 srli(Rd, Rs, 56); 2464 andi(Rd, Rd, 0xFF); 2465 orr(Rd, tmp1, Rd); 2466 } 2467 2468 // rotate right with shift bits 2469 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 2470 { 2471 if (UseZbb) { 2472 rori(dst, src, shift); 2473 return; 2474 } 2475 2476 assert_different_registers(dst, tmp); 2477 assert_different_registers(src, tmp); 2478 assert(shift < 64, "shift amount must be < 64"); 2479 slli(tmp, src, 64 - shift); 2480 srli(dst, src, shift); 2481 orr(dst, dst, tmp); 2482 } 2483 2484 // rotate left with shift bits, 32-bit version 2485 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 2486 if (UseZbb) { 2487 // no roliw available 2488 roriw(dst, src, 32 - shift); 2489 return; 2490 } 2491 2492 assert_different_registers(dst, tmp); 2493 assert_different_registers(src, tmp); 2494 assert(shift < 32, "shift amount must be < 32"); 2495 srliw(tmp, src, 32 - shift); 2496 slliw(dst, src, shift); 2497 orr(dst, dst, tmp); 2498 } 2499 2500 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2501 if (is_simm12(imm)) { 2502 and_imm12(Rd, Rn, imm); 2503 } else { 2504 assert_different_registers(Rn, tmp); 2505 mv(tmp, imm); 2506 andr(Rd, Rn, tmp); 2507 } 2508 } 2509 2510 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2511 ld(tmp1, adr); 2512 if (src.is_register()) { 2513 orr(tmp1, tmp1, src.as_register()); 2514 } else { 2515 if (is_simm12(src.as_constant())) { 2516 ori(tmp1, tmp1, src.as_constant()); 2517 } else { 2518 assert_different_registers(tmp1, tmp2); 2519 mv(tmp2, src.as_constant()); 2520 orr(tmp1, tmp1, tmp2); 2521 } 2522 } 2523 sd(tmp1, adr); 2524 } 2525 2526 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2527 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2528 if (UseCompressedClassPointers) { 2529 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2530 if (CompressedKlassPointers::base() == nullptr) { 2531 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2532 beq(trial_klass, tmp1, L); 2533 return; 2534 } 2535 decode_klass_not_null(tmp1, tmp2); 2536 } else { 2537 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2538 } 2539 beq(trial_klass, tmp1, L); 2540 } 2541 2542 // Move an oop into a register. 2543 void MacroAssembler::movoop(Register dst, jobject obj) { 2544 int oop_index; 2545 if (obj == nullptr) { 2546 oop_index = oop_recorder()->allocate_oop_index(obj); 2547 } else { 2548 #ifdef ASSERT 2549 { 2550 ThreadInVMfromUnknown tiv; 2551 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2552 } 2553 #endif 2554 oop_index = oop_recorder()->find_index(obj); 2555 } 2556 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2557 2558 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2559 la(dst, Address((address)obj, rspec)); 2560 } else { 2561 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2562 ld_constant(dst, Address(dummy, rspec)); 2563 } 2564 } 2565 2566 // Move a metadata address into a register. 2567 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2568 assert((uintptr_t)obj < (1ull << 48), "48-bit overflow in metadata"); 2569 int oop_index; 2570 if (obj == nullptr) { 2571 oop_index = oop_recorder()->allocate_metadata_index(obj); 2572 } else { 2573 oop_index = oop_recorder()->find_index(obj); 2574 } 2575 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2576 la(dst, Address((address)obj, rspec)); 2577 } 2578 2579 // Writes to stack successive pages until offset reached to check for 2580 // stack overflow + shadow pages. This clobbers tmp. 2581 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2582 assert_different_registers(tmp, size, t0); 2583 // Bang stack for total size given plus shadow page size. 2584 // Bang one page at a time because large size can bang beyond yellow and 2585 // red zones. 2586 mv(t0, (int)os::vm_page_size()); 2587 Label loop; 2588 bind(loop); 2589 sub(tmp, sp, t0); 2590 subw(size, size, t0); 2591 sd(size, Address(tmp)); 2592 bgtz(size, loop); 2593 2594 // Bang down shadow pages too. 2595 // At this point, (tmp-0) is the last address touched, so don't 2596 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2597 // was post-decremented.) Skip this address by starting at i=1, and 2598 // touch a few more pages below. N.B. It is important to touch all 2599 // the way down to and including i=StackShadowPages. 2600 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2601 // this could be any sized move but this is can be a debugging crumb 2602 // so the bigger the better. 2603 sub(tmp, tmp, (int)os::vm_page_size()); 2604 sd(size, Address(tmp, 0)); 2605 } 2606 } 2607 2608 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2609 int32_t offset = 0; 2610 _masm = masm; 2611 ExternalAddress target((address)flag_addr); 2612 _masm->relocate(target.rspec(), [&] { 2613 int32_t offset; 2614 _masm->la(t0, target.target(), offset); 2615 _masm->lbu(t0, Address(t0, offset)); 2616 }); 2617 if (value) { 2618 _masm->bnez(t0, _label); 2619 } else { 2620 _masm->beqz(t0, _label); 2621 } 2622 } 2623 2624 SkipIfEqual::~SkipIfEqual() { 2625 _masm->bind(_label); 2626 _masm = nullptr; 2627 } 2628 2629 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2630 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2631 ld(dst, Address(xmethod, Method::const_offset())); 2632 ld(dst, Address(dst, ConstMethod::constants_offset())); 2633 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2634 ld(dst, Address(dst, mirror_offset)); 2635 resolve_oop_handle(dst, tmp1, tmp2); 2636 } 2637 2638 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2639 // OopHandle::resolve is an indirection. 2640 assert_different_registers(result, tmp1, tmp2); 2641 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2642 } 2643 2644 // ((WeakHandle)result).resolve() 2645 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2646 assert_different_registers(result, tmp1, tmp2); 2647 Label resolved; 2648 2649 // A null weak handle resolves to null. 2650 beqz(result, resolved); 2651 2652 // Only 64 bit platforms support GCs that require a tmp register 2653 // Only IN_HEAP loads require a thread_tmp register 2654 // WeakHandle::resolve is an indirection like jweak. 2655 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2656 result, Address(result), tmp1, tmp2); 2657 bind(resolved); 2658 } 2659 2660 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2661 Register dst, Address src, 2662 Register tmp1, Register tmp2) { 2663 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2664 decorators = AccessInternal::decorator_fixup(decorators, type); 2665 bool as_raw = (decorators & AS_RAW) != 0; 2666 if (as_raw) { 2667 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2668 } else { 2669 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2670 } 2671 } 2672 2673 void MacroAssembler::null_check(Register reg, int offset) { 2674 if (needs_explicit_null_check(offset)) { 2675 // provoke OS null exception if reg is null by 2676 // accessing M[reg] w/o changing any registers 2677 // NOTE: this is plenty to provoke a segv 2678 ld(zr, Address(reg, 0)); 2679 } else { 2680 // nothing to do, (later) access of M[reg + offset] 2681 // will provoke OS null exception if reg is null 2682 } 2683 } 2684 2685 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2686 Address dst, Register val, 2687 Register tmp1, Register tmp2, Register tmp3) { 2688 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2689 decorators = AccessInternal::decorator_fixup(decorators, type); 2690 bool as_raw = (decorators & AS_RAW) != 0; 2691 if (as_raw) { 2692 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2693 } else { 2694 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2695 } 2696 } 2697 2698 // Algorithm must match CompressedOops::encode. 2699 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2700 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2701 if (CompressedOops::base() == nullptr) { 2702 if (CompressedOops::shift() != 0) { 2703 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2704 srli(d, s, LogMinObjAlignmentInBytes); 2705 } else { 2706 mv(d, s); 2707 } 2708 } else { 2709 Label notNull; 2710 sub(d, s, xheapbase); 2711 bgez(d, notNull); 2712 mv(d, zr); 2713 bind(notNull); 2714 if (CompressedOops::shift() != 0) { 2715 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2716 srli(d, d, CompressedOops::shift()); 2717 } 2718 } 2719 } 2720 2721 void MacroAssembler::encode_heap_oop_not_null(Register r) { 2722 #ifdef ASSERT 2723 if (CheckCompressedOops) { 2724 Label ok; 2725 bnez(r, ok); 2726 stop("null oop passed to encode_heap_oop_not_null"); 2727 bind(ok); 2728 } 2729 #endif 2730 verify_oop_msg(r, "broken oop in encode_heap_oop_not_null"); 2731 if (CompressedOops::base() != nullptr) { 2732 sub(r, r, xheapbase); 2733 } 2734 if (CompressedOops::shift() != 0) { 2735 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2736 srli(r, r, LogMinObjAlignmentInBytes); 2737 } 2738 } 2739 2740 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2741 #ifdef ASSERT 2742 if (CheckCompressedOops) { 2743 Label ok; 2744 bnez(src, ok); 2745 stop("null oop passed to encode_heap_oop_not_null2"); 2746 bind(ok); 2747 } 2748 #endif 2749 verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2"); 2750 2751 Register data = src; 2752 if (CompressedOops::base() != nullptr) { 2753 sub(dst, src, xheapbase); 2754 data = dst; 2755 } 2756 if (CompressedOops::shift() != 0) { 2757 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2758 srli(dst, data, LogMinObjAlignmentInBytes); 2759 data = dst; 2760 } 2761 if (data == src) { 2762 mv(dst, src); 2763 } 2764 } 2765 2766 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2767 assert_different_registers(dst, tmp); 2768 assert_different_registers(src, tmp); 2769 if (UseCompressedClassPointers) { 2770 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2771 decode_klass_not_null(dst, tmp); 2772 } else { 2773 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2774 } 2775 } 2776 2777 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2778 // FIXME: Should this be a store release? concurrent gcs assumes 2779 // klass length is valid if klass field is not null. 2780 if (UseCompressedClassPointers) { 2781 encode_klass_not_null(src, tmp); 2782 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2783 } else { 2784 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2785 } 2786 } 2787 2788 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2789 if (UseCompressedClassPointers) { 2790 // Store to klass gap in destination 2791 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2792 } 2793 } 2794 2795 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2796 assert_different_registers(r, tmp); 2797 decode_klass_not_null(r, r, tmp); 2798 } 2799 2800 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2801 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2802 2803 if (CompressedKlassPointers::base() == nullptr) { 2804 if (CompressedKlassPointers::shift() != 0) { 2805 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2806 slli(dst, src, LogKlassAlignmentInBytes); 2807 } else { 2808 mv(dst, src); 2809 } 2810 return; 2811 } 2812 2813 Register xbase = dst; 2814 if (dst == src) { 2815 xbase = tmp; 2816 } 2817 2818 assert_different_registers(src, xbase); 2819 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2820 2821 if (CompressedKlassPointers::shift() != 0) { 2822 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2823 assert_different_registers(t0, xbase); 2824 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2825 } else { 2826 add(dst, xbase, src); 2827 } 2828 } 2829 2830 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2831 assert_different_registers(r, tmp); 2832 encode_klass_not_null(r, r, tmp); 2833 } 2834 2835 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2836 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2837 2838 if (CompressedKlassPointers::base() == nullptr) { 2839 if (CompressedKlassPointers::shift() != 0) { 2840 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2841 srli(dst, src, LogKlassAlignmentInBytes); 2842 } else { 2843 mv(dst, src); 2844 } 2845 return; 2846 } 2847 2848 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2849 CompressedKlassPointers::shift() == 0) { 2850 zero_extend(dst, src, 32); 2851 return; 2852 } 2853 2854 Register xbase = dst; 2855 if (dst == src) { 2856 xbase = tmp; 2857 } 2858 2859 assert_different_registers(src, xbase); 2860 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2861 sub(dst, src, xbase); 2862 if (CompressedKlassPointers::shift() != 0) { 2863 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2864 srli(dst, dst, LogKlassAlignmentInBytes); 2865 } 2866 } 2867 2868 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2869 decode_heap_oop_not_null(r, r); 2870 } 2871 2872 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2873 assert(UseCompressedOops, "should only be used for compressed headers"); 2874 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2875 // Cannot assert, unverified entry point counts instructions (see .ad file) 2876 // vtableStubs also counts instructions in pd_code_size_limit. 2877 // Also do not verify_oop as this is called by verify_oop. 2878 if (CompressedOops::shift() != 0) { 2879 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2880 slli(dst, src, LogMinObjAlignmentInBytes); 2881 if (CompressedOops::base() != nullptr) { 2882 add(dst, xheapbase, dst); 2883 } 2884 } else { 2885 assert(CompressedOops::base() == nullptr, "sanity"); 2886 mv(dst, src); 2887 } 2888 } 2889 2890 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2891 if (CompressedOops::base() == nullptr) { 2892 if (CompressedOops::shift() != 0 || d != s) { 2893 slli(d, s, CompressedOops::shift()); 2894 } 2895 } else { 2896 Label done; 2897 mv(d, s); 2898 beqz(s, done); 2899 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2900 bind(done); 2901 } 2902 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2903 } 2904 2905 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2906 Register tmp2, Register tmp3, DecoratorSet decorators) { 2907 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2908 } 2909 2910 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2911 Register tmp2, DecoratorSet decorators) { 2912 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2913 } 2914 2915 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2916 Register tmp2, DecoratorSet decorators) { 2917 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2918 } 2919 2920 // Used for storing nulls. 2921 void MacroAssembler::store_heap_oop_null(Address dst) { 2922 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2923 } 2924 2925 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2926 bool want_remainder, bool is_signed) 2927 { 2928 // Full implementation of Java idiv and irem. The function 2929 // returns the (pc) offset of the div instruction - may be needed 2930 // for implicit exceptions. 2931 // 2932 // input : rs1: dividend 2933 // rs2: divisor 2934 // 2935 // result: either 2936 // quotient (= rs1 idiv rs2) 2937 // remainder (= rs1 irem rs2) 2938 2939 2940 int idivl_offset = offset(); 2941 if (!want_remainder) { 2942 if (is_signed) { 2943 divw(result, rs1, rs2); 2944 } else { 2945 divuw(result, rs1, rs2); 2946 } 2947 } else { 2948 // result = rs1 % rs2; 2949 if (is_signed) { 2950 remw(result, rs1, rs2); 2951 } else { 2952 remuw(result, rs1, rs2); 2953 } 2954 } 2955 return idivl_offset; 2956 } 2957 2958 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2959 bool want_remainder, bool is_signed) 2960 { 2961 // Full implementation of Java ldiv and lrem. The function 2962 // returns the (pc) offset of the div instruction - may be needed 2963 // for implicit exceptions. 2964 // 2965 // input : rs1: dividend 2966 // rs2: divisor 2967 // 2968 // result: either 2969 // quotient (= rs1 idiv rs2) 2970 // remainder (= rs1 irem rs2) 2971 2972 int idivq_offset = offset(); 2973 if (!want_remainder) { 2974 if (is_signed) { 2975 div(result, rs1, rs2); 2976 } else { 2977 divu(result, rs1, rs2); 2978 } 2979 } else { 2980 // result = rs1 % rs2; 2981 if (is_signed) { 2982 rem(result, rs1, rs2); 2983 } else { 2984 remu(result, rs1, rs2); 2985 } 2986 } 2987 return idivq_offset; 2988 } 2989 2990 // Look up the method for a megamorpic invkkeinterface call. 2991 // The target method is determined by <intf_klass, itable_index>. 2992 // The receiver klass is in recv_klass. 2993 // On success, the result will be in method_result, and execution falls through. 2994 // On failure, execution transfers to the given label. 2995 void MacroAssembler::lookup_interface_method(Register recv_klass, 2996 Register intf_klass, 2997 RegisterOrConstant itable_index, 2998 Register method_result, 2999 Register scan_tmp, 3000 Label& L_no_such_interface, 3001 bool return_method) { 3002 assert_different_registers(recv_klass, intf_klass, scan_tmp); 3003 assert_different_registers(method_result, intf_klass, scan_tmp); 3004 assert(recv_klass != method_result || !return_method, 3005 "recv_klass can be destroyed when mehtid isn't needed"); 3006 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 3007 "caller must be same register for non-constant itable index as for method"); 3008 3009 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 3010 int vtable_base = in_bytes(Klass::vtable_start_offset()); 3011 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 3012 int scan_step = itableOffsetEntry::size() * wordSize; 3013 int vte_size = vtableEntry::size_in_bytes(); 3014 assert(vte_size == wordSize, "else adjust times_vte_scale"); 3015 3016 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 3017 3018 // Could store the aligned, prescaled offset in the klass. 3019 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 3020 add(scan_tmp, scan_tmp, vtable_base); 3021 3022 if (return_method) { 3023 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 3024 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 3025 if (itable_index.is_register()) { 3026 slli(t0, itable_index.as_register(), 3); 3027 } else { 3028 mv(t0, itable_index.as_constant() << 3); 3029 } 3030 add(recv_klass, recv_klass, t0); 3031 if (itentry_off) { 3032 add(recv_klass, recv_klass, itentry_off); 3033 } 3034 } 3035 3036 Label search, found_method; 3037 3038 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 3039 beq(intf_klass, method_result, found_method); 3040 bind(search); 3041 // Check that the previous entry is non-null. A null entry means that 3042 // the receiver class doesn't implement the interface, and wasn't the 3043 // same as when the caller was compiled. 3044 beqz(method_result, L_no_such_interface, /* is_far */ true); 3045 addi(scan_tmp, scan_tmp, scan_step); 3046 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 3047 bne(intf_klass, method_result, search); 3048 3049 bind(found_method); 3050 3051 // Got a hit. 3052 if (return_method) { 3053 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 3054 add(method_result, recv_klass, scan_tmp); 3055 ld(method_result, Address(method_result)); 3056 } 3057 } 3058 3059 // Look up the method for a megamorphic invokeinterface call in a single pass over itable: 3060 // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData 3061 // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index 3062 // The target method is determined by <holder_klass, itable_index>. 3063 // The receiver klass is in recv_klass. 3064 // On success, the result will be in method_result, and execution falls through. 3065 // On failure, execution transfers to the given label. 3066 void MacroAssembler::lookup_interface_method_stub(Register recv_klass, 3067 Register holder_klass, 3068 Register resolved_klass, 3069 Register method_result, 3070 Register temp_itbl_klass, 3071 Register scan_temp, 3072 int itable_index, 3073 Label& L_no_such_interface) { 3074 // 'method_result' is only used as output register at the very end of this method. 3075 // Until then we can reuse it as 'holder_offset'. 3076 Register holder_offset = method_result; 3077 assert_different_registers(resolved_klass, recv_klass, holder_klass, temp_itbl_klass, scan_temp, holder_offset); 3078 3079 int vtable_start_offset_bytes = in_bytes(Klass::vtable_start_offset()); 3080 int scan_step = itableOffsetEntry::size() * wordSize; 3081 int ioffset_bytes = in_bytes(itableOffsetEntry::interface_offset()); 3082 int ooffset_bytes = in_bytes(itableOffsetEntry::offset_offset()); 3083 int itmentry_off_bytes = in_bytes(itableMethodEntry::method_offset()); 3084 const int vte_scale = exact_log2(vtableEntry::size_in_bytes()); 3085 3086 Label L_loop_search_resolved_entry, L_resolved_found, L_holder_found; 3087 3088 lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); 3089 add(recv_klass, recv_klass, vtable_start_offset_bytes + ioffset_bytes); 3090 // itableOffsetEntry[] itable = recv_klass + Klass::vtable_start_offset() 3091 // + sizeof(vtableEntry) * (recv_klass->_vtable_len); 3092 // scan_temp = &(itable[0]._interface) 3093 // temp_itbl_klass = itable[0]._interface; 3094 shadd(scan_temp, scan_temp, recv_klass, scan_temp, vte_scale); 3095 ld(temp_itbl_klass, Address(scan_temp)); 3096 mv(holder_offset, zr); 3097 3098 // Initial checks: 3099 // - if (holder_klass != resolved_klass), go to "scan for resolved" 3100 // - if (itable[0] == holder_klass), shortcut to "holder found" 3101 // - if (itable[0] == 0), no such interface 3102 bne(resolved_klass, holder_klass, L_loop_search_resolved_entry); 3103 beq(holder_klass, temp_itbl_klass, L_holder_found); 3104 beqz(temp_itbl_klass, L_no_such_interface); 3105 3106 // Loop: Look for holder_klass record in itable 3107 // do { 3108 // temp_itbl_klass = *(scan_temp += scan_step); 3109 // if (temp_itbl_klass == holder_klass) { 3110 // goto L_holder_found; // Found! 3111 // } 3112 // } while (temp_itbl_klass != 0); 3113 // goto L_no_such_interface // Not found. 3114 Label L_search_holder; 3115 bind(L_search_holder); 3116 add(scan_temp, scan_temp, scan_step); 3117 ld(temp_itbl_klass, Address(scan_temp)); 3118 beq(holder_klass, temp_itbl_klass, L_holder_found); 3119 bnez(temp_itbl_klass, L_search_holder); 3120 3121 j(L_no_such_interface); 3122 3123 // Loop: Look for resolved_class record in itable 3124 // while (true) { 3125 // temp_itbl_klass = *(scan_temp += scan_step); 3126 // if (temp_itbl_klass == 0) { 3127 // goto L_no_such_interface; 3128 // } 3129 // if (temp_itbl_klass == resolved_klass) { 3130 // goto L_resolved_found; // Found! 3131 // } 3132 // if (temp_itbl_klass == holder_klass) { 3133 // holder_offset = scan_temp; 3134 // } 3135 // } 3136 // 3137 Label L_loop_search_resolved; 3138 bind(L_loop_search_resolved); 3139 add(scan_temp, scan_temp, scan_step); 3140 ld(temp_itbl_klass, Address(scan_temp)); 3141 bind(L_loop_search_resolved_entry); 3142 beqz(temp_itbl_klass, L_no_such_interface); 3143 beq(resolved_klass, temp_itbl_klass, L_resolved_found); 3144 bne(holder_klass, temp_itbl_klass, L_loop_search_resolved); 3145 mv(holder_offset, scan_temp); 3146 j(L_loop_search_resolved); 3147 3148 // See if we already have a holder klass. If not, go and scan for it. 3149 bind(L_resolved_found); 3150 beqz(holder_offset, L_search_holder); 3151 mv(scan_temp, holder_offset); 3152 3153 // Finally, scan_temp contains holder_klass vtable offset 3154 bind(L_holder_found); 3155 lwu(method_result, Address(scan_temp, ooffset_bytes - ioffset_bytes)); 3156 add(recv_klass, recv_klass, itable_index * wordSize + itmentry_off_bytes 3157 - vtable_start_offset_bytes - ioffset_bytes); // substract offsets to restore the original value of recv_klass 3158 add(method_result, recv_klass, method_result); 3159 ld(method_result, Address(method_result)); 3160 } 3161 3162 // virtual method calling 3163 void MacroAssembler::lookup_virtual_method(Register recv_klass, 3164 RegisterOrConstant vtable_index, 3165 Register method_result) { 3166 const ByteSize base = Klass::vtable_start_offset(); 3167 assert(vtableEntry::size() * wordSize == 8, 3168 "adjust the scaling in the code below"); 3169 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 3170 3171 if (vtable_index.is_register()) { 3172 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 3173 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 3174 } else { 3175 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 3176 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 3177 } 3178 } 3179 3180 void MacroAssembler::membar(uint32_t order_constraint) { 3181 address prev = pc() - MacroAssembler::instruction_size; 3182 address last = code()->last_insn(); 3183 3184 if (last != nullptr && is_membar(last) && prev == last) { 3185 // We are merging two memory barrier instructions. On RISCV we 3186 // can do this simply by ORing them together. 3187 set_membar_kind(prev, get_membar_kind(prev) | order_constraint); 3188 BLOCK_COMMENT("merged membar"); 3189 } else { 3190 code()->set_last_insn(pc()); 3191 3192 uint32_t predecessor = 0; 3193 uint32_t successor = 0; 3194 3195 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 3196 fence(predecessor, successor); 3197 } 3198 } 3199 3200 void MacroAssembler::cmodx_fence() { 3201 BLOCK_COMMENT("cmodx fence"); 3202 if (VM_Version::supports_fencei_barrier()) { 3203 Assembler::fencei(); 3204 } 3205 } 3206 3207 // Form an address from base + offset in Rd. Rd my or may not 3208 // actually be used: you must use the Address that is returned. It 3209 // is up to you to ensure that the shift provided matches the size 3210 // of your data. 3211 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 3212 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 3213 return Address(base, byte_offset); 3214 } 3215 3216 assert_different_registers(Rd, base, noreg); 3217 3218 // Do it the hard way 3219 mv(Rd, byte_offset); 3220 add(Rd, base, Rd); 3221 return Address(Rd); 3222 } 3223 3224 void MacroAssembler::check_klass_subtype(Register sub_klass, 3225 Register super_klass, 3226 Register tmp_reg, 3227 Label& L_success) { 3228 Label L_failure; 3229 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 3230 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 3231 bind(L_failure); 3232 } 3233 3234 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 3235 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 3236 if (acquire) { 3237 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 3238 } 3239 if (at_return) { 3240 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 3241 } else { 3242 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 3243 bnez(t0, slow_path, true /* is_far */); 3244 } 3245 } 3246 3247 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 3248 Label &succeed, Label *fail) { 3249 assert_different_registers(addr, tmp, t0); 3250 assert_different_registers(newv, tmp, t0); 3251 assert_different_registers(oldv, tmp, t0); 3252 3253 // oldv holds comparison value 3254 // newv holds value to write in exchange 3255 // addr identifies memory word to compare against/update 3256 if (UseZacas) { 3257 mv(tmp, oldv); 3258 atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); 3259 beq(tmp, oldv, succeed); 3260 } else { 3261 Label retry_load, nope; 3262 bind(retry_load); 3263 // Load reserved from the memory location 3264 load_reserved(tmp, addr, int64, Assembler::aqrl); 3265 // Fail and exit if it is not what we expect 3266 bne(tmp, oldv, nope); 3267 // If the store conditional succeeds, tmp will be zero 3268 store_conditional(tmp, newv, addr, int64, Assembler::rl); 3269 beqz(tmp, succeed); 3270 // Retry only when the store conditional failed 3271 j(retry_load); 3272 3273 bind(nope); 3274 } 3275 3276 // neither amocas nor lr/sc have an implied barrier in the failing case 3277 membar(AnyAny); 3278 3279 mv(oldv, tmp); 3280 if (fail != nullptr) { 3281 j(*fail); 3282 } 3283 } 3284 3285 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 3286 Label &succeed, Label *fail) { 3287 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 3288 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 3289 } 3290 3291 void MacroAssembler::load_reserved(Register dst, 3292 Register addr, 3293 enum operand_size size, 3294 Assembler::Aqrl acquire) { 3295 switch (size) { 3296 case int64: 3297 lr_d(dst, addr, acquire); 3298 break; 3299 case int32: 3300 lr_w(dst, addr, acquire); 3301 break; 3302 case uint32: 3303 lr_w(dst, addr, acquire); 3304 zero_extend(dst, dst, 32); 3305 break; 3306 default: 3307 ShouldNotReachHere(); 3308 } 3309 } 3310 3311 void MacroAssembler::store_conditional(Register dst, 3312 Register new_val, 3313 Register addr, 3314 enum operand_size size, 3315 Assembler::Aqrl release) { 3316 switch (size) { 3317 case int64: 3318 sc_d(dst, new_val, addr, release); 3319 break; 3320 case int32: 3321 case uint32: 3322 sc_w(dst, new_val, addr, release); 3323 break; 3324 default: 3325 ShouldNotReachHere(); 3326 } 3327 } 3328 3329 3330 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 3331 Register new_val, 3332 enum operand_size size, 3333 Register tmp1, Register tmp2, Register tmp3) { 3334 assert(size == int8 || size == int16, "unsupported operand size"); 3335 3336 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 3337 3338 andi(shift, addr, 3); 3339 slli(shift, shift, 3); 3340 3341 andi(aligned_addr, addr, ~3); 3342 3343 if (size == int8) { 3344 mv(mask, 0xff); 3345 } else { 3346 // size == int16 case 3347 mv(mask, -1); 3348 zero_extend(mask, mask, 16); 3349 } 3350 sll(mask, mask, shift); 3351 3352 notr(not_mask, mask); 3353 3354 sll(expected, expected, shift); 3355 andr(expected, expected, mask); 3356 3357 sll(new_val, new_val, shift); 3358 andr(new_val, new_val, mask); 3359 } 3360 3361 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 3362 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, 3363 // which are forced to work with 4-byte aligned address. 3364 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 3365 Register new_val, 3366 enum operand_size size, 3367 Assembler::Aqrl acquire, Assembler::Aqrl release, 3368 Register result, bool result_as_bool, 3369 Register tmp1, Register tmp2, Register tmp3) { 3370 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3371 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3372 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3373 3374 Label retry, fail, done; 3375 3376 bind(retry); 3377 3378 if (UseZacas) { 3379 lw(old, aligned_addr); 3380 3381 // if old & mask != expected 3382 andr(tmp, old, mask); 3383 bne(tmp, expected, fail); 3384 3385 andr(tmp, old, not_mask); 3386 orr(tmp, tmp, new_val); 3387 3388 atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release); 3389 bne(tmp, old, retry); 3390 } else { 3391 lr_w(old, aligned_addr, acquire); 3392 andr(tmp, old, mask); 3393 bne(tmp, expected, fail); 3394 3395 andr(tmp, old, not_mask); 3396 orr(tmp, tmp, new_val); 3397 sc_w(tmp, tmp, aligned_addr, release); 3398 bnez(tmp, retry); 3399 } 3400 3401 if (result_as_bool) { 3402 mv(result, 1); 3403 j(done); 3404 3405 bind(fail); 3406 mv(result, zr); 3407 3408 bind(done); 3409 } else { 3410 andr(tmp, old, mask); 3411 3412 bind(fail); 3413 srl(result, tmp, shift); 3414 3415 if (size == int8) { 3416 sign_extend(result, result, 8); 3417 } else { 3418 // size == int16 case 3419 sign_extend(result, result, 16); 3420 } 3421 } 3422 } 3423 3424 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 3425 // the weak CAS stuff. The major difference is that it just failed when store conditional 3426 // failed. 3427 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 3428 Register new_val, 3429 enum operand_size size, 3430 Assembler::Aqrl acquire, Assembler::Aqrl release, 3431 Register result, 3432 Register tmp1, Register tmp2, Register tmp3) { 3433 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3434 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3435 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3436 3437 Label fail, done; 3438 3439 if (UseZacas) { 3440 lw(old, aligned_addr); 3441 3442 // if old & mask != expected 3443 andr(tmp, old, mask); 3444 bne(tmp, expected, fail); 3445 3446 andr(tmp, old, not_mask); 3447 orr(tmp, tmp, new_val); 3448 3449 atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release); 3450 bne(tmp, old, fail); 3451 } else { 3452 lr_w(old, aligned_addr, acquire); 3453 andr(tmp, old, mask); 3454 bne(tmp, expected, fail); 3455 3456 andr(tmp, old, not_mask); 3457 orr(tmp, tmp, new_val); 3458 sc_w(tmp, tmp, aligned_addr, release); 3459 bnez(tmp, fail); 3460 } 3461 3462 // Success 3463 mv(result, 1); 3464 j(done); 3465 3466 // Fail 3467 bind(fail); 3468 mv(result, zr); 3469 3470 bind(done); 3471 } 3472 3473 void MacroAssembler::cmpxchg(Register addr, Register expected, 3474 Register new_val, 3475 enum operand_size size, 3476 Assembler::Aqrl acquire, Assembler::Aqrl release, 3477 Register result, bool result_as_bool) { 3478 assert(size != int8 && size != int16, "unsupported operand size"); 3479 assert_different_registers(addr, t0); 3480 assert_different_registers(expected, t0); 3481 assert_different_registers(new_val, t0); 3482 3483 if (UseZacas) { 3484 if (result_as_bool) { 3485 mv(t0, expected); 3486 atomic_cas(t0, new_val, addr, size, acquire, release); 3487 xorr(t0, t0, expected); 3488 seqz(result, t0); 3489 } else { 3490 mv(result, expected); 3491 atomic_cas(result, new_val, addr, size, acquire, release); 3492 } 3493 return; 3494 } 3495 3496 Label retry_load, done, ne_done; 3497 bind(retry_load); 3498 load_reserved(t0, addr, size, acquire); 3499 bne(t0, expected, ne_done); 3500 store_conditional(t0, new_val, addr, size, release); 3501 bnez(t0, retry_load); 3502 3503 // equal, succeed 3504 if (result_as_bool) { 3505 mv(result, 1); 3506 } else { 3507 mv(result, expected); 3508 } 3509 j(done); 3510 3511 // not equal, failed 3512 bind(ne_done); 3513 if (result_as_bool) { 3514 mv(result, zr); 3515 } else { 3516 mv(result, t0); 3517 } 3518 3519 bind(done); 3520 } 3521 3522 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 3523 Register new_val, 3524 enum operand_size size, 3525 Assembler::Aqrl acquire, Assembler::Aqrl release, 3526 Register result) { 3527 if (UseZacas) { 3528 cmpxchg(addr, expected, new_val, size, acquire, release, result, true); 3529 return; 3530 } 3531 3532 assert_different_registers(addr, t0); 3533 assert_different_registers(expected, t0); 3534 assert_different_registers(new_val, t0); 3535 3536 Label fail, done; 3537 load_reserved(t0, addr, size, acquire); 3538 bne(t0, expected, fail); 3539 store_conditional(t0, new_val, addr, size, release); 3540 bnez(t0, fail); 3541 3542 // Success 3543 mv(result, 1); 3544 j(done); 3545 3546 // Fail 3547 bind(fail); 3548 mv(result, zr); 3549 3550 bind(done); 3551 } 3552 3553 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 3554 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 3555 prev = prev->is_valid() ? prev : zr; \ 3556 if (incr.is_register()) { \ 3557 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3558 } else { \ 3559 mv(t0, incr.as_constant()); \ 3560 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3561 } \ 3562 return; \ 3563 } 3564 3565 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 3566 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 3567 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 3568 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 3569 3570 #undef ATOMIC_OP 3571 3572 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 3573 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3574 prev = prev->is_valid() ? prev : zr; \ 3575 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3576 return; \ 3577 } 3578 3579 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 3580 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 3581 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 3582 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 3583 3584 #undef ATOMIC_XCHG 3585 3586 #define ATOMIC_XCHGU(OP1, OP2) \ 3587 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3588 atomic_##OP2(prev, newv, addr); \ 3589 zero_extend(prev, prev, 32); \ 3590 return; \ 3591 } 3592 3593 ATOMIC_XCHGU(xchgwu, xchgw) 3594 ATOMIC_XCHGU(xchgalwu, xchgalw) 3595 3596 #undef ATOMIC_XCHGU 3597 3598 #define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \ 3599 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3600 assert(UseZacas, "invariant"); \ 3601 prev = prev->is_valid() ? prev : zr; \ 3602 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3603 return; \ 3604 } 3605 3606 ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed) 3607 ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed) 3608 ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl) 3609 ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl) 3610 ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl) 3611 ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl) 3612 3613 #undef ATOMIC_CAS 3614 3615 #define ATOMIC_CASU(OP1, OP2) \ 3616 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3617 atomic_##OP2(prev, newv, addr); \ 3618 zero_extend(prev, prev, 32); \ 3619 return; \ 3620 } 3621 3622 ATOMIC_CASU(caswu, casw) 3623 ATOMIC_CASU(caslwu, caslw) 3624 ATOMIC_CASU(casalwu, casalw) 3625 3626 #undef ATOMIC_CASU 3627 3628 void MacroAssembler::atomic_cas( 3629 Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { 3630 switch (size) { 3631 case int64: 3632 switch ((Assembler::Aqrl)(acquire | release)) { 3633 case Assembler::relaxed: 3634 atomic_cas(prev, newv, addr); 3635 break; 3636 case Assembler::rl: 3637 atomic_casl(prev, newv, addr); 3638 break; 3639 case Assembler::aqrl: 3640 atomic_casal(prev, newv, addr); 3641 break; 3642 default: 3643 ShouldNotReachHere(); 3644 } 3645 break; 3646 case int32: 3647 switch ((Assembler::Aqrl)(acquire | release)) { 3648 case Assembler::relaxed: 3649 atomic_casw(prev, newv, addr); 3650 break; 3651 case Assembler::rl: 3652 atomic_caslw(prev, newv, addr); 3653 break; 3654 case Assembler::aqrl: 3655 atomic_casalw(prev, newv, addr); 3656 break; 3657 default: 3658 ShouldNotReachHere(); 3659 } 3660 break; 3661 case uint32: 3662 switch ((Assembler::Aqrl)(acquire | release)) { 3663 case Assembler::relaxed: 3664 atomic_caswu(prev, newv, addr); 3665 break; 3666 case Assembler::rl: 3667 atomic_caslwu(prev, newv, addr); 3668 break; 3669 case Assembler::aqrl: 3670 atomic_casalwu(prev, newv, addr); 3671 break; 3672 default: 3673 ShouldNotReachHere(); 3674 } 3675 break; 3676 default: 3677 ShouldNotReachHere(); 3678 } 3679 } 3680 3681 void MacroAssembler::far_jump(const Address &entry, Register tmp) { 3682 assert(CodeCache::find_blob(entry.target()) != nullptr, 3683 "destination of far jump not found in code cache"); 3684 assert(entry.rspec().type() == relocInfo::external_word_type 3685 || entry.rspec().type() == relocInfo::runtime_call_type 3686 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3687 // Fixed length: see MacroAssembler::far_branch_size() 3688 // We can use auipc + jr here because we know that the total size of 3689 // the code cache cannot exceed 2Gb. 3690 relocate(entry.rspec(), [&] { 3691 int64_t distance = entry.target() - pc(); 3692 int32_t offset = ((int32_t)distance << 20) >> 20; 3693 assert(is_valid_32bit_offset(distance), "Far jump using wrong instructions."); 3694 auipc(tmp, (int32_t)distance + 0x800); 3695 jr(tmp, offset); 3696 }); 3697 } 3698 3699 void MacroAssembler::far_call(const Address &entry, Register tmp) { 3700 assert(CodeCache::find_blob(entry.target()) != nullptr, 3701 "destination of far call not found in code cache"); 3702 assert(entry.rspec().type() == relocInfo::external_word_type 3703 || entry.rspec().type() == relocInfo::runtime_call_type 3704 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3705 // Fixed length: see MacroAssembler::far_branch_size() 3706 // We can use auipc + jalr here because we know that the total size of 3707 // the code cache cannot exceed 2Gb. 3708 relocate(entry.rspec(), [&] { 3709 int64_t distance = entry.target() - pc(); 3710 int32_t offset = ((int32_t)distance << 20) >> 20; 3711 assert(is_valid_32bit_offset(distance), "Far call using wrong instructions."); 3712 auipc(tmp, (int32_t)distance + 0x800); 3713 jalr(tmp, offset); 3714 }); 3715 } 3716 3717 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 3718 Register super_klass, 3719 Register tmp_reg, 3720 Label* L_success, 3721 Label* L_failure, 3722 Label* L_slow_path, 3723 Register super_check_offset) { 3724 assert_different_registers(sub_klass, super_klass, tmp_reg); 3725 bool must_load_sco = (super_check_offset == noreg); 3726 if (must_load_sco) { 3727 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 3728 } else { 3729 assert_different_registers(sub_klass, super_klass, super_check_offset); 3730 } 3731 3732 Label L_fallthrough; 3733 int label_nulls = 0; 3734 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3735 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3736 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3737 assert(label_nulls <= 1, "at most one null in batch"); 3738 3739 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3740 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3741 Address super_check_offset_addr(super_klass, sco_offset); 3742 3743 // Hacked jmp, which may only be used just before L_fallthrough. 3744 #define final_jmp(label) \ 3745 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3746 else j(label) /*omit semi*/ 3747 3748 // If the pointers are equal, we are done (e.g., String[] elements). 3749 // This self-check enables sharing of secondary supertype arrays among 3750 // non-primary types such as array-of-interface. Otherwise, each such 3751 // type would need its own customized SSA. 3752 // We move this check to the front of the fast path because many 3753 // type checks are in fact trivially successful in this manner, 3754 // so we get a nicely predicted branch right at the start of the check. 3755 beq(sub_klass, super_klass, *L_success); 3756 3757 // Check the supertype display: 3758 if (must_load_sco) { 3759 lwu(tmp_reg, super_check_offset_addr); 3760 super_check_offset = tmp_reg; 3761 } 3762 add(t0, sub_klass, super_check_offset); 3763 Address super_check_addr(t0); 3764 ld(t0, super_check_addr); // load displayed supertype 3765 3766 // This check has worked decisively for primary supers. 3767 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3768 // (Secondary supers are interfaces and very deeply nested subtypes.) 3769 // This works in the same check above because of a tricky aliasing 3770 // between the super_Cache and the primary super display elements. 3771 // (The 'super_check_addr' can address either, as the case requires.) 3772 // Note that the cache is updated below if it does not help us find 3773 // what we need immediately. 3774 // So if it was a primary super, we can just fail immediately. 3775 // Otherwise, it's the slow path for us (no success at this point). 3776 3777 beq(super_klass, t0, *L_success); 3778 mv(t1, sc_offset); 3779 if (L_failure == &L_fallthrough) { 3780 beq(super_check_offset, t1, *L_slow_path); 3781 } else { 3782 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 3783 final_jmp(*L_slow_path); 3784 } 3785 3786 bind(L_fallthrough); 3787 3788 #undef final_jmp 3789 } 3790 3791 // Scans count pointer sized words at [addr] for occurrence of value, 3792 // generic 3793 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 3794 Register tmp) { 3795 Label Lloop, Lexit; 3796 beqz(count, Lexit); 3797 bind(Lloop); 3798 ld(tmp, addr); 3799 beq(value, tmp, Lexit); 3800 add(addr, addr, wordSize); 3801 sub(count, count, 1); 3802 bnez(count, Lloop); 3803 bind(Lexit); 3804 } 3805 3806 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3807 Register super_klass, 3808 Register tmp1_reg, 3809 Register tmp2_reg, 3810 Label* L_success, 3811 Label* L_failure) { 3812 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3813 if (tmp2_reg != noreg) { 3814 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3815 } 3816 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3817 3818 Label L_fallthrough; 3819 int label_nulls = 0; 3820 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3821 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3822 3823 assert(label_nulls <= 1, "at most one null in the batch"); 3824 3825 // A couple of useful fields in sub_klass: 3826 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3827 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3828 Address secondary_supers_addr(sub_klass, ss_offset); 3829 Address super_cache_addr( sub_klass, sc_offset); 3830 3831 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3832 3833 // Do a linear scan of the secondary super-klass chain. 3834 // This code is rarely used, so simplicity is a virtue here. 3835 // The repne_scan instruction uses fixed registers, which we must spill. 3836 // Don't worry too much about pre-existing connections with the input regs. 3837 3838 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3839 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3840 3841 RegSet pushed_registers; 3842 if (!IS_A_TEMP(x12)) { 3843 pushed_registers += x12; 3844 } 3845 if (!IS_A_TEMP(x15)) { 3846 pushed_registers += x15; 3847 } 3848 3849 if (super_klass != x10) { 3850 if (!IS_A_TEMP(x10)) { 3851 pushed_registers += x10; 3852 } 3853 } 3854 3855 push_reg(pushed_registers, sp); 3856 3857 // Get super_klass value into x10 (even if it was in x15 or x12) 3858 mv(x10, super_klass); 3859 3860 #ifndef PRODUCT 3861 incrementw(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 3862 #endif // PRODUCT 3863 3864 // We will consult the secondary-super array. 3865 ld(x15, secondary_supers_addr); 3866 // Load the array length. 3867 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3868 // Skip to start of data. 3869 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3870 3871 // Set t0 to an obvious invalid value, falling through by default 3872 mv(t0, -1); 3873 // Scan X12 words at [X15] for an occurrence of X10. 3874 repne_scan(x15, x10, x12, t0); 3875 3876 // pop will restore x10, so we should use a temp register to keep its value 3877 mv(t1, x10); 3878 3879 // Unspill the temp registers: 3880 pop_reg(pushed_registers, sp); 3881 3882 bne(t1, t0, *L_failure); 3883 3884 // Success. Cache the super we found an proceed in triumph. 3885 sd(super_klass, super_cache_addr); 3886 3887 if (L_success != &L_fallthrough) { 3888 j(*L_success); 3889 } 3890 3891 #undef IS_A_TEMP 3892 3893 bind(L_fallthrough); 3894 } 3895 3896 // population_count variant for running without the CPOP 3897 // instruction, which was introduced with Zbb extension. 3898 void MacroAssembler::population_count(Register dst, Register src, 3899 Register tmp1, Register tmp2) { 3900 if (UsePopCountInstruction) { 3901 cpop(dst, src); 3902 } else { 3903 assert_different_registers(src, tmp1, tmp2); 3904 assert_different_registers(dst, tmp1, tmp2); 3905 Label loop, done; 3906 3907 mv(tmp1, src); 3908 // dst = 0; 3909 // while(tmp1 != 0) { 3910 // dst++; 3911 // tmp1 &= (tmp1 - 1); 3912 // } 3913 mv(dst, zr); 3914 beqz(tmp1, done); 3915 { 3916 bind(loop); 3917 addi(dst, dst, 1); 3918 addi(tmp2, tmp1, -1); 3919 andr(tmp1, tmp1, tmp2); 3920 bnez(tmp1, loop); 3921 } 3922 bind(done); 3923 } 3924 } 3925 3926 // Ensure that the inline code and the stub are using the same registers 3927 // as we need to call the stub from inline code when there is a collision 3928 // in the hashed lookup in the secondary supers array. 3929 #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, \ 3930 r_array_index, r_sub_klass, result, r_bitmap) \ 3931 do { \ 3932 assert(r_super_klass == x10 && \ 3933 r_array_base == x11 && \ 3934 r_array_length == x12 && \ 3935 (r_array_index == x13 || r_array_index == noreg) && \ 3936 (r_sub_klass == x14 || r_sub_klass == noreg) && \ 3937 (result == x15 || result == noreg) && \ 3938 (r_bitmap == x16 || r_bitmap == noreg), "registers must match riscv.ad"); \ 3939 } while(0) 3940 3941 // Return true: we succeeded in generating this code 3942 bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, 3943 Register r_super_klass, 3944 Register result, 3945 Register tmp1, 3946 Register tmp2, 3947 Register tmp3, 3948 Register tmp4, 3949 u1 super_klass_slot, 3950 bool stub_is_near) { 3951 assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0); 3952 3953 Label L_fallthrough; 3954 3955 BLOCK_COMMENT("lookup_secondary_supers_table {"); 3956 3957 const Register 3958 r_array_base = tmp1, // x11 3959 r_array_length = tmp2, // x12 3960 r_array_index = tmp3, // x13 3961 r_bitmap = tmp4; // x16 3962 3963 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 3964 r_array_index, r_sub_klass, result, r_bitmap); 3965 3966 u1 bit = super_klass_slot; 3967 3968 // Initialize result value to 1 which means mismatch. 3969 mv(result, 1); 3970 3971 ld(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); 3972 3973 // First check the bitmap to see if super_klass might be present. If 3974 // the bit is zero, we are certain that super_klass is not one of 3975 // the secondary supers. 3976 test_bit(t0, r_bitmap, bit); 3977 beqz(t0, L_fallthrough); 3978 3979 // Get the first array index that can contain super_klass into r_array_index. 3980 if (bit != 0) { 3981 slli(r_array_index, r_bitmap, (Klass::SECONDARY_SUPERS_TABLE_MASK - bit)); 3982 population_count(r_array_index, r_array_index, tmp1, tmp2); 3983 } else { 3984 mv(r_array_index, (u1)1); 3985 } 3986 3987 // We will consult the secondary-super array. 3988 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 3989 3990 // The value i in r_array_index is >= 1, so even though r_array_base 3991 // points to the length, we don't need to adjust it to point to the data. 3992 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code"); 3993 assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code"); 3994 3995 shadd(result, r_array_index, r_array_base, result, LogBytesPerWord); 3996 ld(result, Address(result)); 3997 xorr(result, result, r_super_klass); 3998 beqz(result, L_fallthrough); // Found a match 3999 4000 // Is there another entry to check? Consult the bitmap. 4001 test_bit(t0, r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); 4002 beqz(t0, L_fallthrough); 4003 4004 // Linear probe. 4005 if (bit != 0) { 4006 ror_imm(r_bitmap, r_bitmap, bit); 4007 } 4008 4009 // The slot we just inspected is at secondary_supers[r_array_index - 1]. 4010 // The next slot to be inspected, by the stub we're about to call, 4011 // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap 4012 // have been checked. 4013 rt_call(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); 4014 4015 BLOCK_COMMENT("} lookup_secondary_supers_table"); 4016 4017 bind(L_fallthrough); 4018 4019 if (VerifySecondarySupers) { 4020 verify_secondary_supers_table(r_sub_klass, r_super_klass, // x14, x10 4021 result, tmp1, tmp2, tmp3); // x15, x11, x12, x13 4022 } 4023 return true; 4024 } 4025 4026 // Called by code generated by check_klass_subtype_slow_path 4027 // above. This is called when there is a collision in the hashed 4028 // lookup in the secondary supers array. 4029 void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, 4030 Register r_array_base, 4031 Register r_array_index, 4032 Register r_bitmap, 4033 Register result, 4034 Register tmp1) { 4035 assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, tmp1, result, t0); 4036 4037 const Register 4038 r_array_length = tmp1, 4039 r_sub_klass = noreg; // unused 4040 4041 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 4042 r_array_index, r_sub_klass, result, r_bitmap); 4043 4044 Label L_matched, L_fallthrough, L_bitmap_full; 4045 4046 // Initialize result value to 1 which means mismatch. 4047 mv(result, 1); 4048 4049 // Load the array length. 4050 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 4051 // And adjust the array base to point to the data. 4052 // NB! Effectively increments current slot index by 1. 4053 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, ""); 4054 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 4055 4056 // Check if bitmap is SECONDARY_SUPERS_BITMAP_FULL 4057 assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "Adjust this code"); 4058 subw(t0, r_array_length, Klass::SECONDARY_SUPERS_TABLE_SIZE - 2); 4059 bgtz(t0, L_bitmap_full); 4060 4061 // NB! Our caller has checked bits 0 and 1 in the bitmap. The 4062 // current slot (at secondary_supers[r_array_index]) has not yet 4063 // been inspected, and r_array_index may be out of bounds if we 4064 // wrapped around the end of the array. 4065 4066 { // This is conventional linear probing, but instead of terminating 4067 // when a null entry is found in the table, we maintain a bitmap 4068 // in which a 0 indicates missing entries. 4069 // The check above guarantees there are 0s in the bitmap, so the loop 4070 // eventually terminates. 4071 Label L_loop; 4072 bind(L_loop); 4073 4074 // Check for wraparound. 4075 Label skip; 4076 blt(r_array_index, r_array_length, skip); 4077 mv(r_array_index, zr); 4078 bind(skip); 4079 4080 shadd(t0, r_array_index, r_array_base, t0, LogBytesPerWord); 4081 ld(t0, Address(t0)); 4082 beq(t0, r_super_klass, L_matched); 4083 4084 test_bit(t0, r_bitmap, 2); // look-ahead check (Bit 2); result is non-zero 4085 beqz(t0, L_fallthrough); 4086 4087 ror_imm(r_bitmap, r_bitmap, 1); 4088 addi(r_array_index, r_array_index, 1); 4089 j(L_loop); 4090 } 4091 4092 { // Degenerate case: more than 64 secondary supers. 4093 // FIXME: We could do something smarter here, maybe a vectorized 4094 // comparison or a binary search, but is that worth any added 4095 // complexity? 4096 bind(L_bitmap_full); 4097 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 4098 bne(r_super_klass, t0, L_fallthrough); 4099 } 4100 4101 bind(L_matched); 4102 mv(result, zr); 4103 4104 bind(L_fallthrough); 4105 } 4106 4107 // Make sure that the hashed lookup and a linear scan agree. 4108 void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, 4109 Register r_super_klass, 4110 Register result, 4111 Register tmp1, 4112 Register tmp2, 4113 Register tmp3) { 4114 assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0); 4115 4116 const Register 4117 r_array_base = tmp1, // X11 4118 r_array_length = tmp2, // X12 4119 r_array_index = noreg, // unused 4120 r_bitmap = noreg; // unused 4121 4122 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 4123 r_array_index, r_sub_klass, result, r_bitmap); 4124 4125 BLOCK_COMMENT("verify_secondary_supers_table {"); 4126 4127 // We will consult the secondary-super array. 4128 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 4129 4130 // Load the array length. 4131 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 4132 // And adjust the array base to point to the data. 4133 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 4134 4135 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 4136 Label failed; 4137 mv(tmp3, 1); 4138 bne(r_super_klass, t0, failed); 4139 mv(tmp3, zr); 4140 bind(failed); 4141 4142 snez(result, result); // normalize result to 0/1 for comparison 4143 4144 Label passed; 4145 beq(tmp3, result, passed); 4146 { 4147 mv(x10, r_super_klass); 4148 mv(x11, r_sub_klass); 4149 mv(x12, tmp3); 4150 mv(x13, result); 4151 mv(x14, (address)("mismatch")); 4152 rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure)); 4153 should_not_reach_here(); 4154 } 4155 bind(passed); 4156 4157 BLOCK_COMMENT("} verify_secondary_supers_table"); 4158 } 4159 4160 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 4161 void MacroAssembler::tlab_allocate(Register obj, 4162 Register var_size_in_bytes, 4163 int con_size_in_bytes, 4164 Register tmp1, 4165 Register tmp2, 4166 Label& slow_case, 4167 bool is_far) { 4168 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4169 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 4170 } 4171 4172 // get_thread() can be called anywhere inside generated code so we 4173 // need to save whatever non-callee save context might get clobbered 4174 // by the call to Thread::current() or, indeed, the call setup code. 4175 void MacroAssembler::get_thread(Register thread) { 4176 // save all call-clobbered regs except thread 4177 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 4178 RegSet::range(x28, x31) + ra - thread; 4179 push_reg(saved_regs, sp); 4180 4181 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 4182 jalr(ra); 4183 if (thread != c_rarg0) { 4184 mv(thread, c_rarg0); 4185 } 4186 4187 // restore pushed registers 4188 pop_reg(saved_regs, sp); 4189 } 4190 4191 void MacroAssembler::load_byte_map_base(Register reg) { 4192 CardTable::CardValue* byte_map_base = 4193 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 4194 mv(reg, (uint64_t)byte_map_base); 4195 } 4196 4197 void MacroAssembler::build_frame(int framesize) { 4198 assert(framesize >= 2, "framesize must include space for FP/RA"); 4199 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4200 sub(sp, sp, framesize); 4201 sd(fp, Address(sp, framesize - 2 * wordSize)); 4202 sd(ra, Address(sp, framesize - wordSize)); 4203 if (PreserveFramePointer) { add(fp, sp, framesize); } 4204 } 4205 4206 void MacroAssembler::remove_frame(int framesize) { 4207 assert(framesize >= 2, "framesize must include space for FP/RA"); 4208 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4209 ld(fp, Address(sp, framesize - 2 * wordSize)); 4210 ld(ra, Address(sp, framesize - wordSize)); 4211 add(sp, sp, framesize); 4212 } 4213 4214 void MacroAssembler::reserved_stack_check() { 4215 // testing if reserved zone needs to be enabled 4216 Label no_reserved_zone_enabling; 4217 4218 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 4219 bltu(sp, t0, no_reserved_zone_enabling); 4220 4221 enter(); // RA and FP are live. 4222 mv(c_rarg0, xthread); 4223 rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 4224 leave(); 4225 4226 // We have already removed our own frame. 4227 // throw_delayed_StackOverflowError will think that it's been 4228 // called by our caller. 4229 la(t0, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry())); 4230 jr(t0); 4231 should_not_reach_here(); 4232 4233 bind(no_reserved_zone_enabling); 4234 } 4235 4236 // Move the address of the polling page into dest. 4237 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 4238 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 4239 } 4240 4241 // Read the polling page. The address of the polling page must 4242 // already be in r. 4243 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 4244 relocate(rtype, [&] { 4245 lwu(zr, Address(r, offset)); 4246 }); 4247 } 4248 4249 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 4250 #ifdef ASSERT 4251 { 4252 ThreadInVMfromUnknown tiv; 4253 assert (UseCompressedOops, "should only be used for compressed oops"); 4254 assert (Universe::heap() != nullptr, "java heap should be initialized"); 4255 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4256 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 4257 } 4258 #endif 4259 int oop_index = oop_recorder()->find_index(obj); 4260 relocate(oop_Relocation::spec(oop_index), [&] { 4261 li32(dst, 0xDEADBEEF); 4262 }); 4263 zero_extend(dst, dst, 32); 4264 } 4265 4266 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 4267 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 4268 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4269 int index = oop_recorder()->find_index(k); 4270 assert(!Universe::heap()->is_in(k), "should not be an oop"); 4271 4272 narrowKlass nk = CompressedKlassPointers::encode(k); 4273 relocate(metadata_Relocation::spec(index), [&] { 4274 li32(dst, nk); 4275 }); 4276 zero_extend(dst, dst, 32); 4277 } 4278 4279 // Maybe emit a call via a trampoline. If the code cache is small 4280 // trampolines won't be emitted. 4281 address MacroAssembler::trampoline_call(Address entry) { 4282 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4283 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4284 entry.rspec().type() == relocInfo::static_call_type || 4285 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4286 4287 address target = entry.target(); 4288 4289 // We need a trampoline if branches are far. 4290 if (!in_scratch_emit_size()) { 4291 if (entry.rspec().type() == relocInfo::runtime_call_type) { 4292 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 4293 code()->share_trampoline_for(entry.target(), offset()); 4294 } else { 4295 address stub = emit_trampoline_stub(offset(), target); 4296 if (stub == nullptr) { 4297 postcond(pc() == badAddress); 4298 return nullptr; // CodeCache is full 4299 } 4300 } 4301 } 4302 target = pc(); 4303 4304 address call_pc = pc(); 4305 #ifdef ASSERT 4306 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4307 assert_alignment(call_pc); 4308 } 4309 #endif 4310 relocate(entry.rspec(), [&] { 4311 jump_link(target, t0); 4312 }); 4313 4314 postcond(pc() != badAddress); 4315 return call_pc; 4316 } 4317 4318 address MacroAssembler::load_and_call(Address entry) { 4319 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4320 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4321 entry.rspec().type() == relocInfo::static_call_type || 4322 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4323 4324 address target = entry.target(); 4325 4326 if (!in_scratch_emit_size()) { 4327 address stub = emit_address_stub(offset(), target); 4328 if (stub == nullptr) { 4329 postcond(pc() == badAddress); 4330 return nullptr; // CodeCache is full 4331 } 4332 } 4333 4334 address call_pc = pc(); 4335 #ifdef ASSERT 4336 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4337 assert_alignment(call_pc); 4338 } 4339 #endif 4340 relocate(entry.rspec(), [&] { 4341 load_link_jump(target); 4342 }); 4343 4344 postcond(pc() != badAddress); 4345 return call_pc; 4346 } 4347 4348 address MacroAssembler::ic_call(address entry, jint method_index) { 4349 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 4350 IncompressibleRegion ir(this); // relocations 4351 movptr(t1, (address)Universe::non_oop_word(), t0); 4352 assert_cond(entry != nullptr); 4353 return reloc_call(Address(entry, rh)); 4354 } 4355 4356 int MacroAssembler::ic_check_size() { 4357 // No compressed 4358 return (MacroAssembler::instruction_size * (2 /* 2 loads */ + 1 /* branch */)) + 4359 far_branch_size(); 4360 } 4361 4362 int MacroAssembler::ic_check(int end_alignment) { 4363 IncompressibleRegion ir(this); 4364 Register receiver = j_rarg0; 4365 Register data = t1; 4366 4367 Register tmp1 = t0; // t0 always scratch 4368 // t2 is saved on call, thus should have been saved before this check. 4369 // Hence we can clobber it. 4370 Register tmp2 = t2; 4371 4372 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 4373 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 4374 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 4375 // before the inline cache check here, and not after 4376 align(end_alignment, ic_check_size()); 4377 int uep_offset = offset(); 4378 4379 if (UseCompressedClassPointers) { 4380 lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4381 lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4382 } else { 4383 ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4384 ld(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4385 } 4386 4387 Label ic_hit; 4388 beq(tmp1, tmp2, ic_hit); 4389 // Note, far_jump is not fixed size. 4390 // Is this ever generates a movptr alignment/size will be off. 4391 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 4392 bind(ic_hit); 4393 4394 assert((offset() % end_alignment) == 0, "Misaligned verified entry point."); 4395 return uep_offset; 4396 } 4397 4398 address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) { 4399 address stub = start_a_stub(max_reloc_call_stub_size()); 4400 if (stub == nullptr) { 4401 return nullptr; // CodeBuffer::expand failed 4402 } 4403 4404 // We are always 4-byte aligned here. 4405 assert_alignment(pc()); 4406 4407 // Make sure the address of destination 8-byte aligned. 4408 align(wordSize, 0); 4409 4410 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4411 insts_call_instruction_offset); 4412 const int stub_start_offset = offset(); 4413 relocate(rh, [&] { 4414 assert(offset() - stub_start_offset == 0, 4415 "%ld - %ld == %ld : should be", (long)offset(), (long)stub_start_offset, (long)0); 4416 assert(offset() % wordSize == 0, "bad alignment"); 4417 emit_int64((int64_t)dest); 4418 }); 4419 4420 const address stub_start_addr = addr_at(stub_start_offset); 4421 end_a_stub(); 4422 4423 return stub_start_addr; 4424 } 4425 4426 // Emit a trampoline stub for a call to a target which is too far away. 4427 // 4428 // code sequences: 4429 // 4430 // call-site: 4431 // branch-and-link to <destination> or <trampoline stub> 4432 // 4433 // Related trampoline stub for this call site in the stub section: 4434 // load the call target from the constant pool 4435 // branch (RA still points to the call site above) 4436 4437 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 4438 address dest) { 4439 // Max stub size: alignment nop, TrampolineStub. 4440 address stub = start_a_stub(max_reloc_call_stub_size()); 4441 if (stub == nullptr) { 4442 return nullptr; // CodeBuffer::expand failed 4443 } 4444 4445 assert(UseTrampolines, "Must be using trampos."); 4446 4447 // We are always 4-byte aligned here. 4448 assert_alignment(pc()); 4449 4450 // Create a trampoline stub relocation which relates this trampoline stub 4451 // with the call instruction at insts_call_instruction_offset in the 4452 // instructions code-section. 4453 4454 // Make sure the address of destination 8-byte aligned after 3 instructions. 4455 align(wordSize, MacroAssembler::NativeShortCall::trampoline_data_offset); 4456 4457 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4458 insts_call_instruction_offset); 4459 const int stub_start_offset = offset(); 4460 relocate(rh, [&] { 4461 // Now, create the trampoline stub's code: 4462 // - load the call 4463 // - call 4464 Label target; 4465 ld(t0, target); // auipc + ld 4466 jr(t0); // jalr 4467 bind(target); 4468 assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset, 4469 "should be"); 4470 assert(offset() % wordSize == 0, "bad alignment"); 4471 emit_int64((int64_t)dest); 4472 }); 4473 4474 const address stub_start_addr = addr_at(stub_start_offset); 4475 4476 end_a_stub(); 4477 4478 return stub_start_addr; 4479 } 4480 4481 int MacroAssembler::max_reloc_call_stub_size() { 4482 // Max stub size: alignment nop, TrampolineStub. 4483 if (UseTrampolines) { 4484 return instruction_size + MacroAssembler::NativeShortCall::trampoline_size; 4485 } 4486 return instruction_size + wordSize; 4487 } 4488 4489 int MacroAssembler::static_call_stub_size() { 4490 // (lui, addi, slli, addi, slli, addi) + (lui + lui + slli + add) + jalr 4491 return 11 * MacroAssembler::instruction_size; 4492 } 4493 4494 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 4495 switch (dst.getMode()) { 4496 case Address::base_plus_offset: 4497 // This is the expected mode, although we allow all the other 4498 // forms below. 4499 return form_address(tmp, dst.base(), dst.offset()); 4500 default: 4501 la(tmp, dst); 4502 return Address(tmp); 4503 } 4504 } 4505 4506 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4507 assert(((dst.getMode() == Address::base_plus_offset && 4508 is_simm12(dst.offset())) || is_simm12(value)), 4509 "invalid value and address mode combination"); 4510 Address adr = add_memory_helper(dst, tmp2); 4511 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4512 ld(tmp1, adr); 4513 add(tmp1, tmp1, value, tmp2); 4514 sd(tmp1, adr); 4515 } 4516 4517 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4518 assert(((dst.getMode() == Address::base_plus_offset && 4519 is_simm12(dst.offset())) || is_simm12(value)), 4520 "invalid value and address mode combination"); 4521 Address adr = add_memory_helper(dst, tmp2); 4522 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4523 lwu(tmp1, adr); 4524 addw(tmp1, tmp1, value, tmp2); 4525 sw(tmp1, adr); 4526 } 4527 4528 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4529 assert(((dst.getMode() == Address::base_plus_offset && 4530 is_simm12(dst.offset())) || is_simm12(value)), 4531 "invalid value and address mode combination"); 4532 Address adr = add_memory_helper(dst, tmp2); 4533 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4534 ld(tmp1, adr); 4535 sub(tmp1, tmp1, value, tmp2); 4536 sd(tmp1, adr); 4537 } 4538 4539 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4540 assert(((dst.getMode() == Address::base_plus_offset && 4541 is_simm12(dst.offset())) || is_simm12(value)), 4542 "invalid value and address mode combination"); 4543 Address adr = add_memory_helper(dst, tmp2); 4544 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4545 lwu(tmp1, adr); 4546 subw(tmp1, tmp1, value, tmp2); 4547 sw(tmp1, adr); 4548 } 4549 4550 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 4551 assert_different_registers(src1, t0); 4552 relocate(src2.rspec(), [&] { 4553 int32_t offset; 4554 la(t0, src2.target(), offset); 4555 ld(t0, Address(t0, offset)); 4556 }); 4557 beq(src1, t0, equal); 4558 } 4559 4560 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 4561 load_method_holder(result, method); 4562 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 4563 } 4564 4565 void MacroAssembler::load_method_holder(Register holder, Register method) { 4566 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 4567 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 4568 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 4569 } 4570 4571 // string indexof 4572 // compute index by trailing zeros 4573 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 4574 Register match_mask, Register result, 4575 Register ch2, Register tmp, 4576 bool haystack_isL) { 4577 int haystack_chr_shift = haystack_isL ? 0 : 1; 4578 srl(match_mask, match_mask, trailing_zeros); 4579 srli(match_mask, match_mask, 1); 4580 srli(tmp, trailing_zeros, LogBitsPerByte); 4581 if (!haystack_isL) andi(tmp, tmp, 0xE); 4582 add(haystack, haystack, tmp); 4583 ld(ch2, Address(haystack)); 4584 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 4585 add(result, result, tmp); 4586 } 4587 4588 // string indexof 4589 // Find pattern element in src, compute match mask, 4590 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 4591 // match mask patterns and corresponding indices would be like: 4592 // - 0x8080808080808080 (Latin1) 4593 // - 7 6 5 4 3 2 1 0 (match index) 4594 // - 0x8000800080008000 (UTF16) 4595 // - 3 2 1 0 (match index) 4596 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 4597 Register mask1, Register mask2) { 4598 xorr(src, pattern, src); 4599 sub(match_mask, src, mask1); 4600 orr(src, src, mask2); 4601 notr(src, src); 4602 andr(match_mask, match_mask, src); 4603 } 4604 4605 #ifdef COMPILER2 4606 // Code for BigInteger::mulAdd intrinsic 4607 // out = x10 4608 // in = x11 4609 // offset = x12 (already out.length-offset) 4610 // len = x13 4611 // k = x14 4612 // tmp = x28 4613 // 4614 // pseudo code from java implementation: 4615 // long kLong = k & LONG_MASK; 4616 // carry = 0; 4617 // offset = out.length-offset - 1; 4618 // for (int j = len - 1; j >= 0; j--) { 4619 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 4620 // out[offset--] = (int)product; 4621 // carry = product >>> 32; 4622 // } 4623 // return (int)carry; 4624 void MacroAssembler::mul_add(Register out, Register in, Register offset, 4625 Register len, Register k, Register tmp) { 4626 Label L_tail_loop, L_unroll, L_end; 4627 mv(tmp, out); 4628 mv(out, zr); 4629 blez(len, L_end); 4630 zero_extend(k, k, 32); 4631 slliw(t0, offset, LogBytesPerInt); 4632 add(offset, tmp, t0); 4633 slliw(t0, len, LogBytesPerInt); 4634 add(in, in, t0); 4635 4636 const int unroll = 8; 4637 mv(tmp, unroll); 4638 blt(len, tmp, L_tail_loop); 4639 bind(L_unroll); 4640 for (int i = 0; i < unroll; i++) { 4641 sub(in, in, BytesPerInt); 4642 lwu(t0, Address(in, 0)); 4643 mul(t1, t0, k); 4644 add(t0, t1, out); 4645 sub(offset, offset, BytesPerInt); 4646 lwu(t1, Address(offset, 0)); 4647 add(t0, t0, t1); 4648 sw(t0, Address(offset, 0)); 4649 srli(out, t0, 32); 4650 } 4651 subw(len, len, tmp); 4652 bge(len, tmp, L_unroll); 4653 4654 bind(L_tail_loop); 4655 blez(len, L_end); 4656 sub(in, in, BytesPerInt); 4657 lwu(t0, Address(in, 0)); 4658 mul(t1, t0, k); 4659 add(t0, t1, out); 4660 sub(offset, offset, BytesPerInt); 4661 lwu(t1, Address(offset, 0)); 4662 add(t0, t0, t1); 4663 sw(t0, Address(offset, 0)); 4664 srli(out, t0, 32); 4665 subw(len, len, 1); 4666 j(L_tail_loop); 4667 4668 bind(L_end); 4669 } 4670 4671 // Multiply and multiply-accumulate unsigned 64-bit registers. 4672 void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) { 4673 assert_different_registers(prod_lo, prod_hi); 4674 4675 mul(prod_lo, n, m); 4676 mulhu(prod_hi, n, m); 4677 } 4678 4679 void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n, 4680 Register m, Register tmp1, Register tmp2) { 4681 assert_different_registers(sum_lo, sum_hi); 4682 assert_different_registers(sum_hi, tmp2); 4683 4684 wide_mul(tmp1, tmp2, n, m); 4685 cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1 4686 adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi 4687 } 4688 4689 // add two unsigned input and output carry 4690 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 4691 { 4692 assert_different_registers(dst, carry); 4693 assert_different_registers(dst, src2); 4694 add(dst, src1, src2); 4695 sltu(carry, dst, src2); 4696 } 4697 4698 // add two input with carry 4699 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 4700 assert_different_registers(dst, carry); 4701 add(dst, src1, src2); 4702 add(dst, dst, carry); 4703 } 4704 4705 // add two unsigned input with carry and output carry 4706 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 4707 assert_different_registers(dst, src2); 4708 adc(dst, src1, src2, carry); 4709 sltu(carry, dst, src2); 4710 } 4711 4712 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 4713 Register src1, Register src2, Register carry) { 4714 cad(dest_lo, dest_lo, src1, carry); 4715 add(dest_hi, dest_hi, carry); 4716 cad(dest_lo, dest_lo, src2, carry); 4717 add(final_dest_hi, dest_hi, carry); 4718 } 4719 4720 /** 4721 * Multiply 32 bit by 32 bit first loop. 4722 */ 4723 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 4724 Register y, Register y_idx, Register z, 4725 Register carry, Register product, 4726 Register idx, Register kdx) { 4727 // jlong carry, x[], y[], z[]; 4728 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4729 // long product = y[idx] * x[xstart] + carry; 4730 // z[kdx] = (int)product; 4731 // carry = product >>> 32; 4732 // } 4733 // z[xstart] = (int)carry; 4734 4735 Label L_first_loop, L_first_loop_exit; 4736 blez(idx, L_first_loop_exit); 4737 4738 shadd(t0, xstart, x, t0, LogBytesPerInt); 4739 lwu(x_xstart, Address(t0, 0)); 4740 4741 bind(L_first_loop); 4742 subw(idx, idx, 1); 4743 shadd(t0, idx, y, t0, LogBytesPerInt); 4744 lwu(y_idx, Address(t0, 0)); 4745 mul(product, x_xstart, y_idx); 4746 add(product, product, carry); 4747 srli(carry, product, 32); 4748 subw(kdx, kdx, 1); 4749 shadd(t0, kdx, z, t0, LogBytesPerInt); 4750 sw(product, Address(t0, 0)); 4751 bgtz(idx, L_first_loop); 4752 4753 bind(L_first_loop_exit); 4754 } 4755 4756 /** 4757 * Multiply 64 bit by 64 bit first loop. 4758 */ 4759 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 4760 Register y, Register y_idx, Register z, 4761 Register carry, Register product, 4762 Register idx, Register kdx) { 4763 // 4764 // jlong carry, x[], y[], z[]; 4765 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4766 // huge_128 product = y[idx] * x[xstart] + carry; 4767 // z[kdx] = (jlong)product; 4768 // carry = (jlong)(product >>> 64); 4769 // } 4770 // z[xstart] = carry; 4771 // 4772 4773 Label L_first_loop, L_first_loop_exit; 4774 Label L_one_x, L_one_y, L_multiply; 4775 4776 subw(xstart, xstart, 1); 4777 bltz(xstart, L_one_x); 4778 4779 shadd(t0, xstart, x, t0, LogBytesPerInt); 4780 ld(x_xstart, Address(t0, 0)); 4781 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 4782 4783 bind(L_first_loop); 4784 subw(idx, idx, 1); 4785 bltz(idx, L_first_loop_exit); 4786 subw(idx, idx, 1); 4787 bltz(idx, L_one_y); 4788 4789 shadd(t0, idx, y, t0, LogBytesPerInt); 4790 ld(y_idx, Address(t0, 0)); 4791 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 4792 bind(L_multiply); 4793 4794 mulhu(t0, x_xstart, y_idx); 4795 mul(product, x_xstart, y_idx); 4796 cad(product, product, carry, t1); 4797 adc(carry, t0, zr, t1); 4798 4799 subw(kdx, kdx, 2); 4800 ror_imm(product, product, 32); // back to big-endian 4801 shadd(t0, kdx, z, t0, LogBytesPerInt); 4802 sd(product, Address(t0, 0)); 4803 4804 j(L_first_loop); 4805 4806 bind(L_one_y); 4807 lwu(y_idx, Address(y, 0)); 4808 j(L_multiply); 4809 4810 bind(L_one_x); 4811 lwu(x_xstart, Address(x, 0)); 4812 j(L_first_loop); 4813 4814 bind(L_first_loop_exit); 4815 } 4816 4817 /** 4818 * Multiply 128 bit by 128 bit. Unrolled inner loop. 4819 * 4820 */ 4821 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 4822 Register carry, Register carry2, 4823 Register idx, Register jdx, 4824 Register yz_idx1, Register yz_idx2, 4825 Register tmp, Register tmp3, Register tmp4, 4826 Register tmp6, Register product_hi) { 4827 // jlong carry, x[], y[], z[]; 4828 // int kdx = xstart+1; 4829 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 4830 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 4831 // jlong carry2 = (jlong)(tmp3 >>> 64); 4832 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 4833 // carry = (jlong)(tmp4 >>> 64); 4834 // z[kdx+idx+1] = (jlong)tmp3; 4835 // z[kdx+idx] = (jlong)tmp4; 4836 // } 4837 // idx += 2; 4838 // if (idx > 0) { 4839 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 4840 // z[kdx+idx] = (jlong)yz_idx1; 4841 // carry = (jlong)(yz_idx1 >>> 64); 4842 // } 4843 // 4844 4845 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 4846 4847 srliw(jdx, idx, 2); 4848 4849 bind(L_third_loop); 4850 4851 subw(jdx, jdx, 1); 4852 bltz(jdx, L_third_loop_exit); 4853 subw(idx, idx, 4); 4854 4855 shadd(t0, idx, y, t0, LogBytesPerInt); 4856 ld(yz_idx2, Address(t0, 0)); 4857 ld(yz_idx1, Address(t0, wordSize)); 4858 4859 shadd(tmp6, idx, z, t0, LogBytesPerInt); 4860 4861 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 4862 ror_imm(yz_idx2, yz_idx2, 32); 4863 4864 ld(t1, Address(tmp6, 0)); 4865 ld(t0, Address(tmp6, wordSize)); 4866 4867 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4868 mulhu(tmp4, product_hi, yz_idx1); 4869 4870 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 4871 ror_imm(t1, t1, 32, tmp); 4872 4873 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 4874 mulhu(carry2, product_hi, yz_idx2); 4875 4876 cad(tmp3, tmp3, carry, carry); 4877 adc(tmp4, tmp4, zr, carry); 4878 cad(tmp3, tmp3, t0, t0); 4879 cadc(tmp4, tmp4, tmp, t0); 4880 adc(carry, carry2, zr, t0); 4881 cad(tmp4, tmp4, t1, carry2); 4882 adc(carry, carry, zr, carry2); 4883 4884 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 4885 ror_imm(tmp4, tmp4, 32); 4886 sd(tmp4, Address(tmp6, 0)); 4887 sd(tmp3, Address(tmp6, wordSize)); 4888 4889 j(L_third_loop); 4890 4891 bind(L_third_loop_exit); 4892 4893 andi(idx, idx, 0x3); 4894 beqz(idx, L_post_third_loop_done); 4895 4896 Label L_check_1; 4897 subw(idx, idx, 2); 4898 bltz(idx, L_check_1); 4899 4900 shadd(t0, idx, y, t0, LogBytesPerInt); 4901 ld(yz_idx1, Address(t0, 0)); 4902 ror_imm(yz_idx1, yz_idx1, 32); 4903 4904 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4905 mulhu(tmp4, product_hi, yz_idx1); 4906 4907 shadd(t0, idx, z, t0, LogBytesPerInt); 4908 ld(yz_idx2, Address(t0, 0)); 4909 ror_imm(yz_idx2, yz_idx2, 32, tmp); 4910 4911 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 4912 4913 ror_imm(tmp3, tmp3, 32, tmp); 4914 sd(tmp3, Address(t0, 0)); 4915 4916 bind(L_check_1); 4917 4918 andi(idx, idx, 0x1); 4919 subw(idx, idx, 1); 4920 bltz(idx, L_post_third_loop_done); 4921 shadd(t0, idx, y, t0, LogBytesPerInt); 4922 lwu(tmp4, Address(t0, 0)); 4923 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 4924 mulhu(carry2, tmp4, product_hi); 4925 4926 shadd(t0, idx, z, t0, LogBytesPerInt); 4927 lwu(tmp4, Address(t0, 0)); 4928 4929 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 4930 4931 shadd(t0, idx, z, t0, LogBytesPerInt); 4932 sw(tmp3, Address(t0, 0)); 4933 4934 slli(t0, carry2, 32); 4935 srli(carry, tmp3, 32); 4936 orr(carry, carry, t0); 4937 4938 bind(L_post_third_loop_done); 4939 } 4940 4941 /** 4942 * Code for BigInteger::multiplyToLen() intrinsic. 4943 * 4944 * x10: x 4945 * x11: xlen 4946 * x12: y 4947 * x13: ylen 4948 * x14: z 4949 * x15: tmp0 4950 * x16: tmp1 4951 * x17: tmp2 4952 * x7: tmp3 4953 * x28: tmp4 4954 * x29: tmp5 4955 * x30: tmp6 4956 * x31: tmp7 4957 */ 4958 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 4959 Register z, Register tmp0, 4960 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 4961 Register tmp5, Register tmp6, Register product_hi) { 4962 assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 4963 4964 const Register idx = tmp1; 4965 const Register kdx = tmp2; 4966 const Register xstart = tmp3; 4967 4968 const Register y_idx = tmp4; 4969 const Register carry = tmp5; 4970 const Register product = xlen; 4971 const Register x_xstart = tmp0; 4972 4973 mv(idx, ylen); // idx = ylen; 4974 addw(kdx, xlen, ylen); // kdx = xlen+ylen; 4975 mv(carry, zr); // carry = 0; 4976 4977 Label L_multiply_64_x_64_loop, L_done; 4978 4979 subw(xstart, xlen, 1); 4980 bltz(xstart, L_done); 4981 4982 const Register jdx = tmp1; 4983 4984 if (AvoidUnalignedAccesses) { 4985 // Check if x and y are both 8-byte aligned. 4986 orr(t0, xlen, ylen); 4987 test_bit(t0, t0, 0); 4988 beqz(t0, L_multiply_64_x_64_loop); 4989 4990 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4991 shadd(t0, xstart, z, t0, LogBytesPerInt); 4992 sw(carry, Address(t0, 0)); 4993 4994 Label L_second_loop_unaligned; 4995 bind(L_second_loop_unaligned); 4996 mv(carry, zr); 4997 mv(jdx, ylen); 4998 subw(xstart, xstart, 1); 4999 bltz(xstart, L_done); 5000 sub(sp, sp, 2 * wordSize); 5001 sd(z, Address(sp, 0)); 5002 sd(zr, Address(sp, wordSize)); 5003 shadd(t0, xstart, z, t0, LogBytesPerInt); 5004 addi(z, t0, 4); 5005 shadd(t0, xstart, x, t0, LogBytesPerInt); 5006 lwu(product, Address(t0, 0)); 5007 Label L_third_loop, L_third_loop_exit; 5008 5009 blez(jdx, L_third_loop_exit); 5010 5011 bind(L_third_loop); 5012 subw(jdx, jdx, 1); 5013 shadd(t0, jdx, y, t0, LogBytesPerInt); 5014 lwu(t0, Address(t0, 0)); 5015 mul(t1, t0, product); 5016 add(t0, t1, carry); 5017 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 5018 lwu(t1, Address(tmp6, 0)); 5019 add(t0, t0, t1); 5020 sw(t0, Address(tmp6, 0)); 5021 srli(carry, t0, 32); 5022 bgtz(jdx, L_third_loop); 5023 5024 bind(L_third_loop_exit); 5025 ld(z, Address(sp, 0)); 5026 addi(sp, sp, 2 * wordSize); 5027 shadd(t0, xstart, z, t0, LogBytesPerInt); 5028 sw(carry, Address(t0, 0)); 5029 5030 j(L_second_loop_unaligned); 5031 } 5032 5033 bind(L_multiply_64_x_64_loop); 5034 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5035 5036 Label L_second_loop_aligned; 5037 beqz(kdx, L_second_loop_aligned); 5038 5039 Label L_carry; 5040 subw(kdx, kdx, 1); 5041 beqz(kdx, L_carry); 5042 5043 shadd(t0, kdx, z, t0, LogBytesPerInt); 5044 sw(carry, Address(t0, 0)); 5045 srli(carry, carry, 32); 5046 subw(kdx, kdx, 1); 5047 5048 bind(L_carry); 5049 shadd(t0, kdx, z, t0, LogBytesPerInt); 5050 sw(carry, Address(t0, 0)); 5051 5052 // Second and third (nested) loops. 5053 // 5054 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5055 // carry = 0; 5056 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5057 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5058 // (z[k] & LONG_MASK) + carry; 5059 // z[k] = (int)product; 5060 // carry = product >>> 32; 5061 // } 5062 // z[i] = (int)carry; 5063 // } 5064 // 5065 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 5066 5067 bind(L_second_loop_aligned); 5068 mv(carry, zr); // carry = 0; 5069 mv(jdx, ylen); // j = ystart+1 5070 5071 subw(xstart, xstart, 1); // i = xstart-1; 5072 bltz(xstart, L_done); 5073 5074 sub(sp, sp, 4 * wordSize); 5075 sd(z, Address(sp, 0)); 5076 5077 Label L_last_x; 5078 shadd(t0, xstart, z, t0, LogBytesPerInt); 5079 addi(z, t0, 4); 5080 subw(xstart, xstart, 1); // i = xstart-1; 5081 bltz(xstart, L_last_x); 5082 5083 shadd(t0, xstart, x, t0, LogBytesPerInt); 5084 ld(product_hi, Address(t0, 0)); 5085 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 5086 5087 Label L_third_loop_prologue; 5088 bind(L_third_loop_prologue); 5089 5090 sd(ylen, Address(sp, wordSize)); 5091 sd(x, Address(sp, 2 * wordSize)); 5092 sd(xstart, Address(sp, 3 * wordSize)); 5093 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 5094 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 5095 ld(z, Address(sp, 0)); 5096 ld(ylen, Address(sp, wordSize)); 5097 ld(x, Address(sp, 2 * wordSize)); 5098 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 5099 addi(sp, sp, 4 * wordSize); 5100 5101 addiw(tmp3, xlen, 1); 5102 shadd(t0, tmp3, z, t0, LogBytesPerInt); 5103 sw(carry, Address(t0, 0)); 5104 5105 subw(tmp3, tmp3, 1); 5106 bltz(tmp3, L_done); 5107 5108 srli(carry, carry, 32); 5109 shadd(t0, tmp3, z, t0, LogBytesPerInt); 5110 sw(carry, Address(t0, 0)); 5111 j(L_second_loop_aligned); 5112 5113 // Next infrequent code is moved outside loops. 5114 bind(L_last_x); 5115 lwu(product_hi, Address(x, 0)); 5116 j(L_third_loop_prologue); 5117 5118 bind(L_done); 5119 } 5120 #endif 5121 5122 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 5123 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 5124 // shift 16 bits once. 5125 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 5126 if (UseZbb) { 5127 assert_different_registers(Rd, Rs, tmp1); 5128 int step = isLL ? 8 : 16; 5129 ctz(Rd, Rs); 5130 andi(tmp1, Rd, step - 1); 5131 sub(Rd, Rd, tmp1); 5132 return; 5133 } 5134 5135 assert_different_registers(Rd, Rs, tmp1, tmp2); 5136 Label Loop; 5137 int step = isLL ? 8 : 16; 5138 mv(Rd, -step); 5139 mv(tmp2, Rs); 5140 5141 bind(Loop); 5142 addi(Rd, Rd, step); 5143 andi(tmp1, tmp2, ((1 << step) - 1)); 5144 srli(tmp2, tmp2, step); 5145 beqz(tmp1, Loop); 5146 } 5147 5148 // This instruction reads adjacent 4 bytes from the lower half of source register, 5149 // inflate into a register, for example: 5150 // Rs: A7A6A5A4A3A2A1A0 5151 // Rd: 00A300A200A100A0 5152 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5153 assert_different_registers(Rd, Rs, tmp1, tmp2); 5154 5155 mv(tmp1, 0xFF000000); // first byte mask at lower word 5156 andr(Rd, Rs, tmp1); 5157 for (int i = 0; i < 2; i++) { 5158 slli(Rd, Rd, wordSize); 5159 srli(tmp1, tmp1, wordSize); 5160 andr(tmp2, Rs, tmp1); 5161 orr(Rd, Rd, tmp2); 5162 } 5163 slli(Rd, Rd, wordSize); 5164 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 5165 orr(Rd, Rd, tmp2); 5166 } 5167 5168 // This instruction reads adjacent 4 bytes from the upper half of source register, 5169 // inflate into a register, for example: 5170 // Rs: A7A6A5A4A3A2A1A0 5171 // Rd: 00A700A600A500A4 5172 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5173 assert_different_registers(Rd, Rs, tmp1, tmp2); 5174 srli(Rs, Rs, 32); // only upper 32 bits are needed 5175 inflate_lo32(Rd, Rs, tmp1, tmp2); 5176 } 5177 5178 // The size of the blocks erased by the zero_blocks stub. We must 5179 // handle anything smaller than this ourselves in zero_words(). 5180 const int MacroAssembler::zero_words_block_size = 8; 5181 5182 // zero_words() is used by C2 ClearArray patterns. It is as small as 5183 // possible, handling small word counts locally and delegating 5184 // anything larger to the zero_blocks stub. It is expanded many times 5185 // in compiled code, so it is important to keep it short. 5186 5187 // ptr: Address of a buffer to be zeroed. 5188 // cnt: Count in HeapWords. 5189 // 5190 // ptr, cnt, and t0 are clobbered. 5191 address MacroAssembler::zero_words(Register ptr, Register cnt) { 5192 assert(is_power_of_2(zero_words_block_size), "adjust this"); 5193 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 5194 assert_different_registers(cnt, t0); 5195 5196 BLOCK_COMMENT("zero_words {"); 5197 5198 mv(t0, zero_words_block_size); 5199 Label around, done, done16; 5200 bltu(cnt, t0, around); 5201 { 5202 RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks()); 5203 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 5204 if (StubRoutines::riscv::complete()) { 5205 address tpc = reloc_call(zero_blocks); 5206 if (tpc == nullptr) { 5207 DEBUG_ONLY(reset_labels(around)); 5208 postcond(pc() == badAddress); 5209 return nullptr; 5210 } 5211 } else { 5212 rt_call(zero_blocks.target()); 5213 } 5214 } 5215 bind(around); 5216 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 5217 Label l; 5218 test_bit(t0, cnt, exact_log2(i)); 5219 beqz(t0, l); 5220 for (int j = 0; j < i; j++) { 5221 sd(zr, Address(ptr, j * wordSize)); 5222 } 5223 addi(ptr, ptr, i * wordSize); 5224 bind(l); 5225 } 5226 { 5227 Label l; 5228 test_bit(t0, cnt, 0); 5229 beqz(t0, l); 5230 sd(zr, Address(ptr, 0)); 5231 bind(l); 5232 } 5233 5234 BLOCK_COMMENT("} zero_words"); 5235 postcond(pc() != badAddress); 5236 return pc(); 5237 } 5238 5239 #define SmallArraySize (18 * BytesPerLong) 5240 5241 // base: Address of a buffer to be zeroed, 8 bytes aligned. 5242 // cnt: Immediate count in HeapWords. 5243 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 5244 assert_different_registers(base, t0, t1); 5245 5246 BLOCK_COMMENT("zero_words {"); 5247 5248 if (cnt <= SmallArraySize / BytesPerLong) { 5249 for (int i = 0; i < (int)cnt; i++) { 5250 sd(zr, Address(base, i * wordSize)); 5251 } 5252 } else { 5253 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 5254 int remainder = cnt % unroll; 5255 for (int i = 0; i < remainder; i++) { 5256 sd(zr, Address(base, i * wordSize)); 5257 } 5258 5259 Label loop; 5260 Register cnt_reg = t0; 5261 Register loop_base = t1; 5262 cnt = cnt - remainder; 5263 mv(cnt_reg, cnt); 5264 add(loop_base, base, remainder * wordSize); 5265 bind(loop); 5266 sub(cnt_reg, cnt_reg, unroll); 5267 for (int i = 0; i < unroll; i++) { 5268 sd(zr, Address(loop_base, i * wordSize)); 5269 } 5270 add(loop_base, loop_base, unroll * wordSize); 5271 bnez(cnt_reg, loop); 5272 } 5273 5274 BLOCK_COMMENT("} zero_words"); 5275 } 5276 5277 // base: Address of a buffer to be filled, 8 bytes aligned. 5278 // cnt: Count in 8-byte unit. 5279 // value: Value to be filled with. 5280 // base will point to the end of the buffer after filling. 5281 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 5282 // Algorithm: 5283 // 5284 // t0 = cnt & 7 5285 // cnt -= t0 5286 // p += t0 5287 // switch (t0): 5288 // switch start: 5289 // do while cnt 5290 // cnt -= 8 5291 // p[-8] = value 5292 // case 7: 5293 // p[-7] = value 5294 // case 6: 5295 // p[-6] = value 5296 // // ... 5297 // case 1: 5298 // p[-1] = value 5299 // case 0: 5300 // p += 8 5301 // do-while end 5302 // switch end 5303 5304 assert_different_registers(base, cnt, value, t0, t1); 5305 5306 Label fini, skip, entry, loop; 5307 const int unroll = 8; // Number of sd instructions we'll unroll 5308 5309 beqz(cnt, fini); 5310 5311 andi(t0, cnt, unroll - 1); 5312 sub(cnt, cnt, t0); 5313 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 5314 shadd(base, t0, base, t1, 3); 5315 la(t1, entry); 5316 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 5317 sub(t1, t1, t0); 5318 jr(t1); 5319 5320 bind(loop); 5321 add(base, base, unroll * 8); 5322 for (int i = -unroll; i < 0; i++) { 5323 sd(value, Address(base, i * 8)); 5324 } 5325 bind(entry); 5326 sub(cnt, cnt, unroll); 5327 bgez(cnt, loop); 5328 5329 bind(fini); 5330 } 5331 5332 // Zero blocks of memory by using CBO.ZERO. 5333 // 5334 // Aligns the base address first sufficiently for CBO.ZERO, then uses 5335 // CBO.ZERO repeatedly for every full block. cnt is the size to be 5336 // zeroed in HeapWords. Returns the count of words left to be zeroed 5337 // in cnt. 5338 // 5339 // NOTE: This is intended to be used in the zero_blocks() stub. If 5340 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 5341 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 5342 Label initial_table_end, loop; 5343 5344 // Align base with cache line size. 5345 neg(tmp1, base); 5346 andi(tmp1, tmp1, CacheLineSize - 1); 5347 5348 // tmp1: the number of bytes to be filled to align the base with cache line size. 5349 add(base, base, tmp1); 5350 srai(tmp2, tmp1, 3); 5351 sub(cnt, cnt, tmp2); 5352 srli(tmp2, tmp1, 1); 5353 la(tmp1, initial_table_end); 5354 sub(tmp2, tmp1, tmp2); 5355 jr(tmp2); 5356 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 5357 sd(zr, Address(base, i)); 5358 } 5359 bind(initial_table_end); 5360 5361 mv(tmp1, CacheLineSize / wordSize); 5362 bind(loop); 5363 cbo_zero(base); 5364 sub(cnt, cnt, tmp1); 5365 add(base, base, CacheLineSize); 5366 bge(cnt, tmp1, loop); 5367 } 5368 5369 // java.lang.Math.round(float a) 5370 // Returns the closest int to the argument, with ties rounding to positive infinity. 5371 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 5372 // this instructions calling sequence provides performance improvement on all tested devices; 5373 // don't change it without re-verification 5374 Label done; 5375 mv(t0, jint_cast(0.5f)); 5376 fmv_w_x(ftmp, t0); 5377 5378 // dst = 0 if NaN 5379 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 5380 mv(dst, zr); 5381 beqz(t0, done); 5382 5383 // dst = (src + 0.5f) rounded down towards negative infinity 5384 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 5385 // RDN is required for fadd_s, RNE gives incorrect results: 5386 // -------------------------------------------------------------------- 5387 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 5388 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 5389 // -------------------------------------------------------------------- 5390 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 5391 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 5392 // -------------------------------------------------------------------- 5393 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 5394 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 5395 5396 bind(done); 5397 } 5398 5399 // java.lang.Math.round(double a) 5400 // Returns the closest long to the argument, with ties rounding to positive infinity. 5401 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 5402 // this instructions calling sequence provides performance improvement on all tested devices; 5403 // don't change it without re-verification 5404 Label done; 5405 mv(t0, julong_cast(0.5)); 5406 fmv_d_x(ftmp, t0); 5407 5408 // dst = 0 if NaN 5409 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 5410 mv(dst, zr); 5411 beqz(t0, done); 5412 5413 // dst = (src + 0.5) rounded down towards negative infinity 5414 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 5415 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 5416 5417 bind(done); 5418 } 5419 5420 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 5421 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 5422 Label done; \ 5423 assert_different_registers(dst, tmp); \ 5424 fclass_##FLOATSIG(tmp, src); \ 5425 mv(dst, zr); \ 5426 /* check if src is NaN */ \ 5427 andi(tmp, tmp, fclass_mask::nan); \ 5428 bnez(tmp, done); \ 5429 FLOATCVT(dst, src); \ 5430 bind(done); \ 5431 } 5432 5433 FCVT_SAFE(fcvt_w_s, s); 5434 FCVT_SAFE(fcvt_l_s, s); 5435 FCVT_SAFE(fcvt_w_d, d); 5436 FCVT_SAFE(fcvt_l_d, d); 5437 5438 #undef FCVT_SAFE 5439 5440 #define FCMP(FLOATTYPE, FLOATSIG) \ 5441 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 5442 FloatRegister Rs2, int unordered_result) { \ 5443 Label Ldone; \ 5444 if (unordered_result < 0) { \ 5445 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 5446 /* installs 1 if gt else 0 */ \ 5447 flt_##FLOATSIG(result, Rs2, Rs1); \ 5448 /* Rs1 > Rs2, install 1 */ \ 5449 bgtz(result, Ldone); \ 5450 feq_##FLOATSIG(result, Rs1, Rs2); \ 5451 addi(result, result, -1); \ 5452 /* Rs1 = Rs2, install 0 */ \ 5453 /* NaN or Rs1 < Rs2, install -1 */ \ 5454 bind(Ldone); \ 5455 } else { \ 5456 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 5457 /* installs 1 if gt or unordered else 0 */ \ 5458 flt_##FLOATSIG(result, Rs1, Rs2); \ 5459 /* Rs1 < Rs2, install -1 */ \ 5460 bgtz(result, Ldone); \ 5461 feq_##FLOATSIG(result, Rs1, Rs2); \ 5462 addi(result, result, -1); \ 5463 /* Rs1 = Rs2, install 0 */ \ 5464 /* NaN or Rs1 > Rs2, install 1 */ \ 5465 bind(Ldone); \ 5466 neg(result, result); \ 5467 } \ 5468 } 5469 5470 FCMP(float, s); 5471 FCMP(double, d); 5472 5473 #undef FCMP 5474 5475 // Zero words; len is in bytes 5476 // Destroys all registers except addr 5477 // len must be a nonzero multiple of wordSize 5478 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 5479 assert_different_registers(addr, len, tmp, t0, t1); 5480 5481 #ifdef ASSERT 5482 { 5483 Label L; 5484 andi(t0, len, BytesPerWord - 1); 5485 beqz(t0, L); 5486 stop("len is not a multiple of BytesPerWord"); 5487 bind(L); 5488 } 5489 #endif // ASSERT 5490 5491 #ifndef PRODUCT 5492 block_comment("zero memory"); 5493 #endif // PRODUCT 5494 5495 Label loop; 5496 Label entry; 5497 5498 // Algorithm: 5499 // 5500 // t0 = cnt & 7 5501 // cnt -= t0 5502 // p += t0 5503 // switch (t0) { 5504 // do { 5505 // cnt -= 8 5506 // p[-8] = 0 5507 // case 7: 5508 // p[-7] = 0 5509 // case 6: 5510 // p[-6] = 0 5511 // ... 5512 // case 1: 5513 // p[-1] = 0 5514 // case 0: 5515 // p += 8 5516 // } while (cnt) 5517 // } 5518 5519 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 5520 5521 srli(len, len, LogBytesPerWord); 5522 andi(t0, len, unroll - 1); // t0 = cnt % unroll 5523 sub(len, len, t0); // cnt -= unroll 5524 // tmp always points to the end of the region we're about to zero 5525 shadd(tmp, t0, addr, t1, LogBytesPerWord); 5526 la(t1, entry); 5527 slli(t0, t0, 2); 5528 sub(t1, t1, t0); 5529 jr(t1); 5530 bind(loop); 5531 sub(len, len, unroll); 5532 for (int i = -unroll; i < 0; i++) { 5533 sd(zr, Address(tmp, i * wordSize)); 5534 } 5535 bind(entry); 5536 add(tmp, tmp, unroll * wordSize); 5537 bnez(len, loop); 5538 } 5539 5540 // shift left by shamt and add 5541 // Rd = (Rs1 << shamt) + Rs2 5542 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 5543 if (UseZba) { 5544 if (shamt == 1) { 5545 sh1add(Rd, Rs1, Rs2); 5546 return; 5547 } else if (shamt == 2) { 5548 sh2add(Rd, Rs1, Rs2); 5549 return; 5550 } else if (shamt == 3) { 5551 sh3add(Rd, Rs1, Rs2); 5552 return; 5553 } 5554 } 5555 5556 if (shamt != 0) { 5557 assert_different_registers(Rs2, tmp); 5558 slli(tmp, Rs1, shamt); 5559 add(Rd, Rs2, tmp); 5560 } else { 5561 add(Rd, Rs1, Rs2); 5562 } 5563 } 5564 5565 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 5566 switch (bits) { 5567 case 32: 5568 if (UseZba) { 5569 zext_w(dst, src); 5570 return; 5571 } 5572 break; 5573 case 16: 5574 if (UseZbb) { 5575 zext_h(dst, src); 5576 return; 5577 } 5578 break; 5579 case 8: 5580 if (UseZbb) { 5581 zext_b(dst, src); 5582 return; 5583 } 5584 break; 5585 default: 5586 break; 5587 } 5588 slli(dst, src, XLEN - bits); 5589 srli(dst, dst, XLEN - bits); 5590 } 5591 5592 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 5593 switch (bits) { 5594 case 32: 5595 sext_w(dst, src); 5596 return; 5597 case 16: 5598 if (UseZbb) { 5599 sext_h(dst, src); 5600 return; 5601 } 5602 break; 5603 case 8: 5604 if (UseZbb) { 5605 sext_b(dst, src); 5606 return; 5607 } 5608 break; 5609 default: 5610 break; 5611 } 5612 slli(dst, src, XLEN - bits); 5613 srai(dst, dst, XLEN - bits); 5614 } 5615 5616 void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, 5617 Register tmp, bool is_signed) { 5618 if (src1 == src2) { 5619 mv(dst, zr); 5620 return; 5621 } 5622 Label done; 5623 Register left = src1; 5624 Register right = src2; 5625 if (dst == src1) { 5626 assert_different_registers(dst, src2, tmp); 5627 mv(tmp, src1); 5628 left = tmp; 5629 } else if (dst == src2) { 5630 assert_different_registers(dst, src1, tmp); 5631 mv(tmp, src2); 5632 right = tmp; 5633 } 5634 5635 // installs 1 if gt else 0 5636 if (is_signed) { 5637 slt(dst, right, left); 5638 } else { 5639 sltu(dst, right, left); 5640 } 5641 bnez(dst, done); 5642 if (is_signed) { 5643 slt(dst, left, right); 5644 } else { 5645 sltu(dst, left, right); 5646 } 5647 // dst = -1 if lt; else if eq , dst = 0 5648 neg(dst, dst); 5649 bind(done); 5650 } 5651 5652 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 5653 { 5654 cmp_x2i(dst, src1, src2, tmp); 5655 } 5656 5657 void MacroAssembler::cmp_ul2i(Register dst, Register src1, Register src2, Register tmp) { 5658 cmp_x2i(dst, src1, src2, tmp, false); 5659 } 5660 5661 void MacroAssembler::cmp_uw2i(Register dst, Register src1, Register src2, Register tmp) { 5662 cmp_x2i(dst, src1, src2, tmp, false); 5663 } 5664 5665 // The java_calling_convention describes stack locations as ideal slots on 5666 // a frame with no abi restrictions. Since we must observe abi restrictions 5667 // (like the placement of the register window) the slots must be biased by 5668 // the following value. 5669 static int reg2offset_in(VMReg r) { 5670 // Account for saved fp and ra 5671 // This should really be in_preserve_stack_slots 5672 return r->reg2stack() * VMRegImpl::stack_slot_size; 5673 } 5674 5675 static int reg2offset_out(VMReg r) { 5676 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 5677 } 5678 5679 // The C ABI specifies: 5680 // "integer scalars narrower than XLEN bits are widened according to the sign 5681 // of their type up to 32 bits, then sign-extended to XLEN bits." 5682 // Applies for both passed in register and stack. 5683 // 5684 // Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot. 5685 // Native uses 64-bit stack slots for all integer scalar types. 5686 // 5687 // lw loads the Java stack slot, sign-extends and 5688 // sd store this widened integer into a 64 bit native stack slot. 5689 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 5690 if (src.first()->is_stack()) { 5691 if (dst.first()->is_stack()) { 5692 // stack to stack 5693 lw(tmp, Address(fp, reg2offset_in(src.first()))); 5694 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5695 } else { 5696 // stack to reg 5697 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5698 } 5699 } else if (dst.first()->is_stack()) { 5700 // reg to stack 5701 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5702 } else { 5703 if (dst.first() != src.first()) { 5704 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 5705 } 5706 } 5707 } 5708 5709 // An oop arg. Must pass a handle not the oop itself 5710 void MacroAssembler::object_move(OopMap* map, 5711 int oop_handle_offset, 5712 int framesize_in_slots, 5713 VMRegPair src, 5714 VMRegPair dst, 5715 bool is_receiver, 5716 int* receiver_offset) { 5717 assert_cond(map != nullptr && receiver_offset != nullptr); 5718 5719 // must pass a handle. First figure out the location we use as a handle 5720 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 5721 5722 // See if oop is null if it is we need no handle 5723 5724 if (src.first()->is_stack()) { 5725 // Oop is already on the stack as an argument 5726 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 5727 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 5728 if (is_receiver) { 5729 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 5730 } 5731 5732 ld(t0, Address(fp, reg2offset_in(src.first()))); 5733 la(rHandle, Address(fp, reg2offset_in(src.first()))); 5734 // conditionally move a null 5735 Label notZero1; 5736 bnez(t0, notZero1); 5737 mv(rHandle, zr); 5738 bind(notZero1); 5739 } else { 5740 5741 // Oop is in a register we must store it to the space we reserve 5742 // on the stack for oop_handles and pass a handle if oop is non-null 5743 5744 const Register rOop = src.first()->as_Register(); 5745 int oop_slot = -1; 5746 if (rOop == j_rarg0) { 5747 oop_slot = 0; 5748 } else if (rOop == j_rarg1) { 5749 oop_slot = 1; 5750 } else if (rOop == j_rarg2) { 5751 oop_slot = 2; 5752 } else if (rOop == j_rarg3) { 5753 oop_slot = 3; 5754 } else if (rOop == j_rarg4) { 5755 oop_slot = 4; 5756 } else if (rOop == j_rarg5) { 5757 oop_slot = 5; 5758 } else if (rOop == j_rarg6) { 5759 oop_slot = 6; 5760 } else { 5761 assert(rOop == j_rarg7, "wrong register"); 5762 oop_slot = 7; 5763 } 5764 5765 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 5766 int offset = oop_slot * VMRegImpl::stack_slot_size; 5767 5768 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 5769 // Store oop in handle area, may be null 5770 sd(rOop, Address(sp, offset)); 5771 if (is_receiver) { 5772 *receiver_offset = offset; 5773 } 5774 5775 //rOop maybe the same as rHandle 5776 if (rOop == rHandle) { 5777 Label isZero; 5778 beqz(rOop, isZero); 5779 la(rHandle, Address(sp, offset)); 5780 bind(isZero); 5781 } else { 5782 Label notZero2; 5783 la(rHandle, Address(sp, offset)); 5784 bnez(rOop, notZero2); 5785 mv(rHandle, zr); 5786 bind(notZero2); 5787 } 5788 } 5789 5790 // If arg is on the stack then place it otherwise it is already in correct reg. 5791 if (dst.first()->is_stack()) { 5792 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 5793 } 5794 } 5795 5796 // A float arg may have to do float reg int reg conversion 5797 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 5798 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5799 (src.first()->is_reg() && dst.first()->is_reg()) || 5800 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5801 if (src.first()->is_stack()) { 5802 if (dst.first()->is_stack()) { 5803 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 5804 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 5805 } else if (dst.first()->is_Register()) { 5806 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5807 } else { 5808 ShouldNotReachHere(); 5809 } 5810 } else if (src.first() != dst.first()) { 5811 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5812 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5813 } else { 5814 ShouldNotReachHere(); 5815 } 5816 } 5817 } 5818 5819 // A long move 5820 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 5821 if (src.first()->is_stack()) { 5822 if (dst.first()->is_stack()) { 5823 // stack to stack 5824 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5825 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5826 } else { 5827 // stack to reg 5828 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5829 } 5830 } else if (dst.first()->is_stack()) { 5831 // reg to stack 5832 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5833 } else { 5834 if (dst.first() != src.first()) { 5835 mv(dst.first()->as_Register(), src.first()->as_Register()); 5836 } 5837 } 5838 } 5839 5840 // A double move 5841 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 5842 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5843 (src.first()->is_reg() && dst.first()->is_reg()) || 5844 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5845 if (src.first()->is_stack()) { 5846 if (dst.first()->is_stack()) { 5847 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5848 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5849 } else if (dst.first()-> is_Register()) { 5850 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5851 } else { 5852 ShouldNotReachHere(); 5853 } 5854 } else if (src.first() != dst.first()) { 5855 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5856 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5857 } else { 5858 ShouldNotReachHere(); 5859 } 5860 } 5861 } 5862 5863 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 5864 assert(bit_pos < 64, "invalid bit range"); 5865 if (UseZbs) { 5866 bexti(Rd, Rs, bit_pos); 5867 return; 5868 } 5869 int64_t imm = (int64_t)(1UL << bit_pos); 5870 if (is_simm12(imm)) { 5871 and_imm12(Rd, Rs, imm); 5872 } else { 5873 srli(Rd, Rs, bit_pos); 5874 and_imm12(Rd, Rd, 1); 5875 } 5876 } 5877 5878 // Implements lightweight-locking. 5879 // 5880 // - obj: the object to be locked 5881 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 5882 // - slow: branched to if locking fails 5883 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5884 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5885 assert_different_registers(basic_lock, obj, tmp1, tmp2, tmp3, t0); 5886 5887 Label push; 5888 const Register top = tmp1; 5889 const Register mark = tmp2; 5890 const Register t = tmp3; 5891 5892 // Preload the markWord. It is important that this is the first 5893 // instruction emitted as it is part of C1's null check semantics. 5894 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5895 5896 if (UseObjectMonitorTable) { 5897 // Clear cache in case fast locking succeeds. 5898 sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); 5899 } 5900 5901 // Check if the lock-stack is full. 5902 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5903 mv(t, (unsigned)LockStack::end_offset()); 5904 bge(top, t, slow, /* is_far */ true); 5905 5906 // Check for recursion. 5907 add(t, xthread, top); 5908 ld(t, Address(t, -oopSize)); 5909 beq(obj, t, push); 5910 5911 // Check header for monitor (0b10). 5912 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5913 bnez(t, slow, /* is_far */ true); 5914 5915 // Try to lock. Transition lock-bits 0b01 => 0b00 5916 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 5917 ori(mark, mark, markWord::unlocked_value); 5918 xori(t, mark, markWord::unlocked_value); 5919 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5920 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 5921 bne(mark, t, slow, /* is_far */ true); 5922 5923 bind(push); 5924 // After successful lock, push object on lock-stack. 5925 add(t, xthread, top); 5926 sd(obj, Address(t)); 5927 addw(top, top, oopSize); 5928 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5929 } 5930 5931 // Implements ligthweight-unlocking. 5932 // 5933 // - obj: the object to be unlocked 5934 // - tmp1, tmp2, tmp3: temporary registers 5935 // - slow: branched to if unlocking fails 5936 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5937 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5938 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5939 5940 #ifdef ASSERT 5941 { 5942 // Check for lock-stack underflow. 5943 Label stack_ok; 5944 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 5945 mv(tmp2, (unsigned)LockStack::start_offset()); 5946 bge(tmp1, tmp2, stack_ok); 5947 STOP("Lock-stack underflow"); 5948 bind(stack_ok); 5949 } 5950 #endif 5951 5952 Label unlocked, push_and_slow; 5953 const Register top = tmp1; 5954 const Register mark = tmp2; 5955 const Register t = tmp3; 5956 5957 // Check if obj is top of lock-stack. 5958 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5959 subw(top, top, oopSize); 5960 add(t, xthread, top); 5961 ld(t, Address(t)); 5962 bne(obj, t, slow, /* is_far */ true); 5963 5964 // Pop lock-stack. 5965 DEBUG_ONLY(add(t, xthread, top);) 5966 DEBUG_ONLY(sd(zr, Address(t));) 5967 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5968 5969 // Check if recursive. 5970 add(t, xthread, top); 5971 ld(t, Address(t, -oopSize)); 5972 beq(obj, t, unlocked); 5973 5974 // Not recursive. Check header for monitor (0b10). 5975 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5976 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5977 bnez(t, push_and_slow); 5978 5979 #ifdef ASSERT 5980 // Check header not unlocked (0b01). 5981 Label not_unlocked; 5982 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 5983 beqz(t, not_unlocked); 5984 stop("lightweight_unlock already unlocked"); 5985 bind(not_unlocked); 5986 #endif 5987 5988 // Try to unlock. Transition lock bits 0b00 => 0b01 5989 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 5990 ori(t, mark, markWord::unlocked_value); 5991 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5992 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 5993 beq(mark, t, unlocked); 5994 5995 bind(push_and_slow); 5996 // Restore lock-stack and handle the unlock in runtime. 5997 DEBUG_ONLY(add(t, xthread, top);) 5998 DEBUG_ONLY(sd(obj, Address(t));) 5999 addw(top, top, oopSize); 6000 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 6001 j(slow); 6002 6003 bind(unlocked); 6004 }