1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/cardTable.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.hpp" 37 #include "interpreter/bytecodeHistogram.hpp" 38 #include "interpreter/interpreter.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "memory/universe.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedKlass.inline.hpp" 43 #include "oops/compressedOops.inline.hpp" 44 #include "oops/klass.inline.hpp" 45 #include "oops/oop.hpp" 46 #include "runtime/interfaceSupport.inline.hpp" 47 #include "runtime/javaThread.hpp" 48 #include "runtime/jniHandles.inline.hpp" 49 #include "runtime/sharedRuntime.hpp" 50 #include "runtime/stubRoutines.hpp" 51 #include "utilities/globalDefinitions.hpp" 52 #include "utilities/powerOfTwo.hpp" 53 #ifdef COMPILER2 54 #include "opto/compile.hpp" 55 #include "opto/node.hpp" 56 #include "opto/output.hpp" 57 #endif 58 59 #ifdef PRODUCT 60 #define BLOCK_COMMENT(str) /* nothing */ 61 #else 62 #define BLOCK_COMMENT(str) block_comment(str) 63 #endif 64 #define STOP(str) stop(str); 65 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 66 67 68 69 Register MacroAssembler::extract_rs1(address instr) { 70 assert_cond(instr != nullptr); 71 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 19, 15)); 72 } 73 74 Register MacroAssembler::extract_rs2(address instr) { 75 assert_cond(instr != nullptr); 76 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 24, 20)); 77 } 78 79 Register MacroAssembler::extract_rd(address instr) { 80 assert_cond(instr != nullptr); 81 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 11, 7)); 82 } 83 84 uint32_t MacroAssembler::extract_opcode(address instr) { 85 assert_cond(instr != nullptr); 86 return Assembler::extract(Assembler::ld_instr(instr), 6, 0); 87 } 88 89 uint32_t MacroAssembler::extract_funct3(address instr) { 90 assert_cond(instr != nullptr); 91 return Assembler::extract(Assembler::ld_instr(instr), 14, 12); 92 } 93 94 bool MacroAssembler::is_pc_relative_at(address instr) { 95 // auipc + jalr 96 // auipc + addi 97 // auipc + load 98 // auipc + fload_load 99 return (is_auipc_at(instr)) && 100 (is_addi_at(instr + instruction_size) || 101 is_jalr_at(instr + instruction_size) || 102 is_load_at(instr + instruction_size) || 103 is_float_load_at(instr + instruction_size)) && 104 check_pc_relative_data_dependency(instr); 105 } 106 107 // ie:ld(Rd, Label) 108 bool MacroAssembler::is_load_pc_relative_at(address instr) { 109 return is_auipc_at(instr) && // auipc 110 is_ld_at(instr + instruction_size) && // ld 111 check_load_pc_relative_data_dependency(instr); 112 } 113 114 bool MacroAssembler::is_movptr1_at(address instr) { 115 return is_lui_at(instr) && // Lui 116 is_addi_at(instr + instruction_size) && // Addi 117 is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 118 is_addi_at(instr + instruction_size * 3) && // Addi 119 is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 120 (is_addi_at(instr + instruction_size * 5) || 121 is_jalr_at(instr + instruction_size * 5) || 122 is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load 123 check_movptr1_data_dependency(instr); 124 } 125 126 bool MacroAssembler::is_movptr2_at(address instr) { 127 return is_lui_at(instr) && // lui 128 is_lui_at(instr + instruction_size) && // lui 129 is_slli_shift_at(instr + instruction_size * 2, 18) && // slli Rd, Rs, 18 130 is_add_at(instr + instruction_size * 3) && 131 (is_addi_at(instr + instruction_size * 4) || 132 is_jalr_at(instr + instruction_size * 4) || 133 is_load_at(instr + instruction_size * 4)) && // Addi/Jalr/Load 134 check_movptr2_data_dependency(instr); 135 } 136 137 bool MacroAssembler::is_li16u_at(address instr) { 138 return is_lui_at(instr) && // lui 139 is_srli_at(instr + instruction_size) && // srli 140 check_li16u_data_dependency(instr); 141 } 142 143 bool MacroAssembler::is_li32_at(address instr) { 144 return is_lui_at(instr) && // lui 145 is_addiw_at(instr + instruction_size) && // addiw 146 check_li32_data_dependency(instr); 147 } 148 149 bool MacroAssembler::is_lwu_to_zr(address instr) { 150 assert_cond(instr != nullptr); 151 return (extract_opcode(instr) == 0b0000011 && 152 extract_funct3(instr) == 0b110 && 153 extract_rd(instr) == zr); // zr 154 } 155 156 uint32_t MacroAssembler::get_membar_kind(address addr) { 157 assert_cond(addr != nullptr); 158 assert(is_membar(addr), "no membar found"); 159 160 uint32_t insn = Bytes::get_native_u4(addr); 161 162 uint32_t predecessor = Assembler::extract(insn, 27, 24); 163 uint32_t successor = Assembler::extract(insn, 23, 20); 164 165 return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); 166 } 167 168 void MacroAssembler::set_membar_kind(address addr, uint32_t order_kind) { 169 assert_cond(addr != nullptr); 170 assert(is_membar(addr), "no membar found"); 171 172 uint32_t predecessor = 0; 173 uint32_t successor = 0; 174 175 MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); 176 177 uint32_t insn = Bytes::get_native_u4(addr); 178 address pInsn = (address) &insn; 179 Assembler::patch(pInsn, 27, 24, predecessor); 180 Assembler::patch(pInsn, 23, 20, successor); 181 182 address membar = addr; 183 Assembler::sd_instr(membar, insn); 184 } 185 186 187 static void pass_arg0(MacroAssembler* masm, Register arg) { 188 if (c_rarg0 != arg) { 189 masm->mv(c_rarg0, arg); 190 } 191 } 192 193 static void pass_arg1(MacroAssembler* masm, Register arg) { 194 if (c_rarg1 != arg) { 195 masm->mv(c_rarg1, arg); 196 } 197 } 198 199 static void pass_arg2(MacroAssembler* masm, Register arg) { 200 if (c_rarg2 != arg) { 201 masm->mv(c_rarg2, arg); 202 } 203 } 204 205 static void pass_arg3(MacroAssembler* masm, Register arg) { 206 if (c_rarg3 != arg) { 207 masm->mv(c_rarg3, arg); 208 } 209 } 210 211 void MacroAssembler::push_cont_fastpath(Register java_thread) { 212 if (!Continuations::enabled()) return; 213 Label done; 214 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 215 bleu(sp, t0, done); 216 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 217 bind(done); 218 } 219 220 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 221 if (!Continuations::enabled()) return; 222 Label done; 223 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 224 bltu(sp, t0, done); 225 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 226 bind(done); 227 } 228 229 int MacroAssembler::align(int modulus, int extra_offset) { 230 CompressibleRegion cr(this); 231 intptr_t before = offset(); 232 while ((offset() + extra_offset) % modulus != 0) { nop(); } 233 return (int)(offset() - before); 234 } 235 236 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 237 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 238 } 239 240 // Implementation of call_VM versions 241 242 void MacroAssembler::call_VM(Register oop_result, 243 address entry_point, 244 bool check_exceptions) { 245 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 246 } 247 248 void MacroAssembler::call_VM(Register oop_result, 249 address entry_point, 250 Register arg_1, 251 bool check_exceptions) { 252 pass_arg1(this, arg_1); 253 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 254 } 255 256 void MacroAssembler::call_VM(Register oop_result, 257 address entry_point, 258 Register arg_1, 259 Register arg_2, 260 bool check_exceptions) { 261 assert_different_registers(arg_1, c_rarg2); 262 pass_arg2(this, arg_2); 263 pass_arg1(this, arg_1); 264 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 265 } 266 267 void MacroAssembler::call_VM(Register oop_result, 268 address entry_point, 269 Register arg_1, 270 Register arg_2, 271 Register arg_3, 272 bool check_exceptions) { 273 assert_different_registers(arg_1, c_rarg2, c_rarg3); 274 assert_different_registers(arg_2, c_rarg3); 275 pass_arg3(this, arg_3); 276 277 pass_arg2(this, arg_2); 278 279 pass_arg1(this, arg_1); 280 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 281 } 282 283 void MacroAssembler::call_VM(Register oop_result, 284 Register last_java_sp, 285 address entry_point, 286 int number_of_arguments, 287 bool check_exceptions) { 288 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 289 } 290 291 void MacroAssembler::call_VM(Register oop_result, 292 Register last_java_sp, 293 address entry_point, 294 Register arg_1, 295 bool check_exceptions) { 296 pass_arg1(this, arg_1); 297 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 298 } 299 300 void MacroAssembler::call_VM(Register oop_result, 301 Register last_java_sp, 302 address entry_point, 303 Register arg_1, 304 Register arg_2, 305 bool check_exceptions) { 306 307 assert_different_registers(arg_1, c_rarg2); 308 pass_arg2(this, arg_2); 309 pass_arg1(this, arg_1); 310 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 311 } 312 313 void MacroAssembler::call_VM(Register oop_result, 314 Register last_java_sp, 315 address entry_point, 316 Register arg_1, 317 Register arg_2, 318 Register arg_3, 319 bool check_exceptions) { 320 assert_different_registers(arg_1, c_rarg2, c_rarg3); 321 assert_different_registers(arg_2, c_rarg3); 322 pass_arg3(this, arg_3); 323 pass_arg2(this, arg_2); 324 pass_arg1(this, arg_1); 325 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 326 } 327 328 void MacroAssembler::post_call_nop() { 329 if (!Continuations::enabled()) { 330 return; 331 } 332 relocate(post_call_nop_Relocation::spec(), [&] { 333 InlineSkippedInstructionsCounter skipCounter(this); 334 nop(); 335 li32(zr, 0); 336 }); 337 } 338 339 // these are no-ops overridden by InterpreterMacroAssembler 340 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 341 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 342 343 // Calls to C land 344 // 345 // When entering C land, the fp, & esp of the last Java frame have to be recorded 346 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 347 // has to be reset to 0. This is required to allow proper stack traversal. 348 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 349 Register last_java_fp, 350 Register last_java_pc) { 351 352 if (last_java_pc->is_valid()) { 353 sd(last_java_pc, Address(xthread, 354 JavaThread::frame_anchor_offset() + 355 JavaFrameAnchor::last_Java_pc_offset())); 356 } 357 358 // determine last_java_sp register 359 if (!last_java_sp->is_valid()) { 360 last_java_sp = esp; 361 } 362 363 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 364 365 // last_java_fp is optional 366 if (last_java_fp->is_valid()) { 367 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 368 } 369 } 370 371 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 372 Register last_java_fp, 373 address last_java_pc, 374 Register tmp) { 375 assert(last_java_pc != nullptr, "must provide a valid PC"); 376 377 la(tmp, last_java_pc); 378 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 379 380 set_last_Java_frame(last_java_sp, last_java_fp, noreg); 381 } 382 383 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 384 Register last_java_fp, 385 Label &L, 386 Register tmp) { 387 if (L.is_bound()) { 388 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 389 } else { 390 L.add_patch_at(code(), locator()); 391 IncompressibleRegion ir(this); // the label address will be patched back. 392 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 393 } 394 } 395 396 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 397 // we must set sp to zero to clear frame 398 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 399 400 // must clear fp, so that compiled frames are not confused; it is 401 // possible that we need it only for debugging 402 if (clear_fp) { 403 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 404 } 405 406 // Always clear the pc because it could have been set by make_walkable() 407 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 408 } 409 410 void MacroAssembler::call_VM_base(Register oop_result, 411 Register java_thread, 412 Register last_java_sp, 413 address entry_point, 414 int number_of_arguments, 415 bool check_exceptions) { 416 // determine java_thread register 417 if (!java_thread->is_valid()) { 418 java_thread = xthread; 419 } 420 // determine last_java_sp register 421 if (!last_java_sp->is_valid()) { 422 last_java_sp = esp; 423 } 424 425 // debugging support 426 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 427 assert(java_thread == xthread, "unexpected register"); 428 429 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 430 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 431 432 // push java thread (becomes first argument of C function) 433 mv(c_rarg0, java_thread); 434 435 // set last Java frame before call 436 assert(last_java_sp != fp, "can't use fp"); 437 438 Label l; 439 set_last_Java_frame(last_java_sp, fp, l, t0); 440 441 // do the call, remove parameters 442 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 443 444 // reset last Java frame 445 // Only interpreter should have to clear fp 446 reset_last_Java_frame(true); 447 448 // C++ interp handles this in the interpreter 449 check_and_handle_popframe(java_thread); 450 check_and_handle_earlyret(java_thread); 451 452 if (check_exceptions) { 453 // check for pending exceptions (java_thread is set upon return) 454 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 455 Label ok; 456 beqz(t0, ok); 457 RuntimeAddress target(StubRoutines::forward_exception_entry()); 458 relocate(target.rspec(), [&] { 459 int32_t offset; 460 la(t0, target.target(), offset); 461 jr(t0, offset); 462 }); 463 bind(ok); 464 } 465 466 // get oop result if there is one and reset the value in the thread 467 if (oop_result->is_valid()) { 468 get_vm_result(oop_result, java_thread); 469 } 470 } 471 472 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 473 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 474 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 475 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 476 } 477 478 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 479 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 480 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 481 } 482 483 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 484 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 485 assert_different_registers(klass, xthread, tmp); 486 487 Label L_fallthrough, L_tmp; 488 if (L_fast_path == nullptr) { 489 L_fast_path = &L_fallthrough; 490 } else if (L_slow_path == nullptr) { 491 L_slow_path = &L_fallthrough; 492 } 493 494 // Fast path check: class is fully initialized 495 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 496 sub(tmp, tmp, InstanceKlass::fully_initialized); 497 beqz(tmp, *L_fast_path); 498 499 // Fast path check: current thread is initializer thread 500 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 501 502 if (L_slow_path == &L_fallthrough) { 503 beq(xthread, tmp, *L_fast_path); 504 bind(*L_slow_path); 505 } else if (L_fast_path == &L_fallthrough) { 506 bne(xthread, tmp, *L_slow_path); 507 bind(*L_fast_path); 508 } else { 509 Unimplemented(); 510 } 511 } 512 513 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 514 if (!VerifyOops) { return; } 515 516 // Pass register number to verify_oop_subroutine 517 const char* b = nullptr; 518 { 519 ResourceMark rm; 520 stringStream ss; 521 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 522 b = code_string(ss.as_string()); 523 } 524 BLOCK_COMMENT("verify_oop {"); 525 526 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 527 528 mv(c_rarg0, reg); // c_rarg0 : x10 529 { 530 // The length of the instruction sequence emitted should not depend 531 // on the address of the char buffer so that the size of mach nodes for 532 // scratch emit and normal emit matches. 533 IncompressibleRegion ir(this); // Fixed length 534 movptr(t0, (address) b); 535 } 536 537 // call indirectly to solve generation ordering problem 538 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 539 relocate(target.rspec(), [&] { 540 int32_t offset; 541 la(t1, target.target(), offset); 542 ld(t1, Address(t1, offset)); 543 }); 544 jalr(t1); 545 546 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 547 548 BLOCK_COMMENT("} verify_oop"); 549 } 550 551 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 552 if (!VerifyOops) { 553 return; 554 } 555 556 const char* b = nullptr; 557 { 558 ResourceMark rm; 559 stringStream ss; 560 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 561 b = code_string(ss.as_string()); 562 } 563 BLOCK_COMMENT("verify_oop_addr {"); 564 565 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 566 567 if (addr.uses(sp)) { 568 la(x10, addr); 569 ld(x10, Address(x10, 4 * wordSize)); 570 } else { 571 ld(x10, addr); 572 } 573 574 { 575 // The length of the instruction sequence emitted should not depend 576 // on the address of the char buffer so that the size of mach nodes for 577 // scratch emit and normal emit matches. 578 IncompressibleRegion ir(this); // Fixed length 579 movptr(t0, (address) b); 580 } 581 582 // call indirectly to solve generation ordering problem 583 RuntimeAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 584 relocate(target.rspec(), [&] { 585 int32_t offset; 586 la(t1, target.target(), offset); 587 ld(t1, Address(t1, offset)); 588 }); 589 jalr(t1); 590 591 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 592 593 BLOCK_COMMENT("} verify_oop_addr"); 594 } 595 596 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 597 int extra_slot_offset) { 598 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 599 int stackElementSize = Interpreter::stackElementSize; 600 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 601 #ifdef ASSERT 602 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 603 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 604 #endif 605 if (arg_slot.is_constant()) { 606 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 607 } else { 608 assert_different_registers(t0, arg_slot.as_register()); 609 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 610 return Address(t0, offset); 611 } 612 } 613 614 #ifndef PRODUCT 615 extern "C" void findpc(intptr_t x); 616 #endif 617 618 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 619 { 620 // In order to get locks to work, we need to fake a in_VM state 621 if (ShowMessageBoxOnError) { 622 JavaThread* thread = JavaThread::current(); 623 JavaThreadState saved_state = thread->thread_state(); 624 thread->set_thread_state(_thread_in_vm); 625 #ifndef PRODUCT 626 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 627 ttyLocker ttyl; 628 BytecodeCounter::print(); 629 } 630 #endif 631 if (os::message_box(msg, "Execution stopped, print registers?")) { 632 ttyLocker ttyl; 633 tty->print_cr(" pc = 0x%016lx", pc); 634 #ifndef PRODUCT 635 tty->cr(); 636 findpc(pc); 637 tty->cr(); 638 #endif 639 tty->print_cr(" x0 = 0x%016lx", regs[0]); 640 tty->print_cr(" x1 = 0x%016lx", regs[1]); 641 tty->print_cr(" x2 = 0x%016lx", regs[2]); 642 tty->print_cr(" x3 = 0x%016lx", regs[3]); 643 tty->print_cr(" x4 = 0x%016lx", regs[4]); 644 tty->print_cr(" x5 = 0x%016lx", regs[5]); 645 tty->print_cr(" x6 = 0x%016lx", regs[6]); 646 tty->print_cr(" x7 = 0x%016lx", regs[7]); 647 tty->print_cr(" x8 = 0x%016lx", regs[8]); 648 tty->print_cr(" x9 = 0x%016lx", regs[9]); 649 tty->print_cr("x10 = 0x%016lx", regs[10]); 650 tty->print_cr("x11 = 0x%016lx", regs[11]); 651 tty->print_cr("x12 = 0x%016lx", regs[12]); 652 tty->print_cr("x13 = 0x%016lx", regs[13]); 653 tty->print_cr("x14 = 0x%016lx", regs[14]); 654 tty->print_cr("x15 = 0x%016lx", regs[15]); 655 tty->print_cr("x16 = 0x%016lx", regs[16]); 656 tty->print_cr("x17 = 0x%016lx", regs[17]); 657 tty->print_cr("x18 = 0x%016lx", regs[18]); 658 tty->print_cr("x19 = 0x%016lx", regs[19]); 659 tty->print_cr("x20 = 0x%016lx", regs[20]); 660 tty->print_cr("x21 = 0x%016lx", regs[21]); 661 tty->print_cr("x22 = 0x%016lx", regs[22]); 662 tty->print_cr("x23 = 0x%016lx", regs[23]); 663 tty->print_cr("x24 = 0x%016lx", regs[24]); 664 tty->print_cr("x25 = 0x%016lx", regs[25]); 665 tty->print_cr("x26 = 0x%016lx", regs[26]); 666 tty->print_cr("x27 = 0x%016lx", regs[27]); 667 tty->print_cr("x28 = 0x%016lx", regs[28]); 668 tty->print_cr("x30 = 0x%016lx", regs[30]); 669 tty->print_cr("x31 = 0x%016lx", regs[31]); 670 BREAKPOINT; 671 } 672 } 673 fatal("DEBUG MESSAGE: %s", msg); 674 } 675 676 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 677 assert_different_registers(value, tmp1, tmp2); 678 Label done, tagged, weak_tagged; 679 680 beqz(value, done); // Use null as-is. 681 // Test for tag. 682 andi(tmp1, value, JNIHandles::tag_mask); 683 bnez(tmp1, tagged); 684 685 // Resolve local handle 686 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 687 verify_oop(value); 688 j(done); 689 690 bind(tagged); 691 // Test for jweak tag. 692 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 693 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 694 bnez(tmp1, weak_tagged); 695 696 // Resolve global handle 697 access_load_at(T_OBJECT, IN_NATIVE, value, 698 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 699 verify_oop(value); 700 j(done); 701 702 bind(weak_tagged); 703 // Resolve jweak. 704 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 705 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 706 verify_oop(value); 707 708 bind(done); 709 } 710 711 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 712 assert_different_registers(value, tmp1, tmp2); 713 Label done; 714 715 beqz(value, done); // Use null as-is. 716 717 #ifdef ASSERT 718 { 719 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 720 Label valid_global_tag; 721 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 722 bnez(tmp1, valid_global_tag); 723 stop("non global jobject using resolve_global_jobject"); 724 bind(valid_global_tag); 725 } 726 #endif 727 728 // Resolve global handle 729 access_load_at(T_OBJECT, IN_NATIVE, value, 730 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 731 verify_oop(value); 732 733 bind(done); 734 } 735 736 void MacroAssembler::stop(const char* msg) { 737 BLOCK_COMMENT(msg); 738 illegal_instruction(Assembler::csr::time); 739 emit_int64((uintptr_t)msg); 740 } 741 742 void MacroAssembler::unimplemented(const char* what) { 743 const char* buf = nullptr; 744 { 745 ResourceMark rm; 746 stringStream ss; 747 ss.print("unimplemented: %s", what); 748 buf = code_string(ss.as_string()); 749 } 750 stop(buf); 751 } 752 753 void MacroAssembler::emit_static_call_stub() { 754 IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). 755 // CompiledDirectCall::set_to_interpreted knows the 756 // exact layout of this stub. 757 758 mov_metadata(xmethod, (Metadata*)nullptr); 759 760 // Jump to the entry point of the c2i stub. 761 int32_t offset = 0; 762 movptr(t0, 0, offset, t1); // lui + lui + slli + add 763 jr(t0, offset); 764 } 765 766 void MacroAssembler::call_VM_leaf_base(address entry_point, 767 int number_of_arguments, 768 Label *retaddr) { 769 int32_t offset = 0; 770 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 771 mv(t0, entry_point, offset); 772 jalr(t0, offset); 773 if (retaddr != nullptr) { 774 bind(*retaddr); 775 } 776 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 777 } 778 779 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 780 call_VM_leaf_base(entry_point, number_of_arguments); 781 } 782 783 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 784 pass_arg0(this, arg_0); 785 call_VM_leaf_base(entry_point, 1); 786 } 787 788 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 789 assert_different_registers(arg_1, c_rarg0); 790 pass_arg0(this, arg_0); 791 pass_arg1(this, arg_1); 792 call_VM_leaf_base(entry_point, 2); 793 } 794 795 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 796 Register arg_1, Register arg_2) { 797 assert_different_registers(arg_1, c_rarg0); 798 assert_different_registers(arg_2, c_rarg0, c_rarg1); 799 pass_arg0(this, arg_0); 800 pass_arg1(this, arg_1); 801 pass_arg2(this, arg_2); 802 call_VM_leaf_base(entry_point, 3); 803 } 804 805 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 806 pass_arg0(this, arg_0); 807 MacroAssembler::call_VM_leaf_base(entry_point, 1); 808 } 809 810 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 811 812 assert_different_registers(arg_0, c_rarg1); 813 pass_arg1(this, arg_1); 814 pass_arg0(this, arg_0); 815 MacroAssembler::call_VM_leaf_base(entry_point, 2); 816 } 817 818 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 819 assert_different_registers(arg_0, c_rarg1, c_rarg2); 820 assert_different_registers(arg_1, c_rarg2); 821 pass_arg2(this, arg_2); 822 pass_arg1(this, arg_1); 823 pass_arg0(this, arg_0); 824 MacroAssembler::call_VM_leaf_base(entry_point, 3); 825 } 826 827 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 828 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 829 assert_different_registers(arg_1, c_rarg2, c_rarg3); 830 assert_different_registers(arg_2, c_rarg3); 831 832 pass_arg3(this, arg_3); 833 pass_arg2(this, arg_2); 834 pass_arg1(this, arg_1); 835 pass_arg0(this, arg_0); 836 MacroAssembler::call_VM_leaf_base(entry_point, 4); 837 } 838 839 void MacroAssembler::la(Register Rd, const address addr) { 840 int32_t offset; 841 la(Rd, addr, offset); 842 addi(Rd, Rd, offset); 843 } 844 845 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) { 846 if (is_32bit_offset_from_codecache((int64_t)addr)) { 847 int64_t distance = addr - pc(); 848 assert(is_valid_32bit_offset(distance), "Must be"); 849 auipc(Rd, (int32_t)distance + 0x800); 850 offset = ((int32_t)distance << 20) >> 20; 851 } else { 852 assert(!CodeCache::contains(addr), "Must be"); 853 movptr(Rd, addr, offset); 854 } 855 } 856 857 void MacroAssembler::la(Register Rd, const Address &adr) { 858 switch (adr.getMode()) { 859 case Address::literal: { 860 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 861 if (rtype == relocInfo::none) { 862 mv(Rd, (intptr_t)(adr.target())); 863 } else { 864 relocate(adr.rspec(), [&] { 865 movptr(Rd, adr.target()); 866 }); 867 } 868 break; 869 } 870 case Address::base_plus_offset: { 871 Address new_adr = legitimize_address(Rd, adr); 872 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 873 addi(Rd, new_adr.base(), new_adr.offset()); 874 } 875 break; 876 } 877 default: 878 ShouldNotReachHere(); 879 } 880 } 881 882 void MacroAssembler::la(Register Rd, Label &label) { 883 IncompressibleRegion ir(this); // the label address may be patched back. 884 wrap_label(Rd, label, &MacroAssembler::la); 885 } 886 887 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 888 lui(Rd, (uint32_t)imm << 12); 889 srli(Rd, Rd, 12); 890 } 891 892 void MacroAssembler::li32(Register Rd, int32_t imm) { 893 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 894 int64_t upper = imm, lower = imm; 895 lower = (imm << 20) >> 20; 896 upper -= lower; 897 upper = (int32_t)upper; 898 // lui Rd, imm[31:12] + imm[11] 899 lui(Rd, upper); 900 addiw(Rd, Rd, lower); 901 } 902 903 void MacroAssembler::li(Register Rd, int64_t imm) { 904 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 905 // li -> c.li 906 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 907 c_li(Rd, imm); 908 return; 909 } 910 911 int shift = 12; 912 int64_t upper = imm, lower = imm; 913 // Split imm to a lower 12-bit sign-extended part and the remainder, 914 // because addi will sign-extend the lower imm. 915 lower = ((int32_t)imm << 20) >> 20; 916 upper -= lower; 917 918 // Test whether imm is a 32-bit integer. 919 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 920 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 921 while (((upper >> shift) & 1) == 0) { shift++; } 922 upper >>= shift; 923 li(Rd, upper); 924 slli(Rd, Rd, shift); 925 if (lower != 0) { 926 addi(Rd, Rd, lower); 927 } 928 } else { 929 // 32-bit integer 930 Register hi_Rd = zr; 931 if (upper != 0) { 932 lui(Rd, (int32_t)upper); 933 hi_Rd = Rd; 934 } 935 if (lower != 0 || hi_Rd == zr) { 936 addiw(Rd, hi_Rd, lower); 937 } 938 } 939 } 940 941 void MacroAssembler::load_link_jump(const address source, Register temp) { 942 assert(temp != noreg && temp != x0, "expecting a register"); 943 assert_cond(source != nullptr); 944 int64_t distance = source - pc(); 945 assert(is_simm32(distance), "Must be"); 946 auipc(temp, (int32_t)distance + 0x800); 947 ld(temp, Address(temp, ((int32_t)distance << 20) >> 20)); 948 jalr(temp); 949 } 950 951 void MacroAssembler::jump_link(const address dest, Register temp) { 952 assert(UseTrampolines, "Must be"); 953 assert_cond(dest != nullptr); 954 int64_t distance = dest - pc(); 955 assert(is_simm21(distance), "Must be"); 956 assert((distance % 2) == 0, "Must be"); 957 jal(x1, distance); 958 } 959 960 void MacroAssembler::j(const address dest, Register temp) { 961 assert(CodeCache::contains(dest), "Must be"); 962 assert_cond(dest != nullptr); 963 int64_t distance = dest - pc(); 964 965 // We can't patch C, i.e. if Label wasn't bound we need to patch this jump. 966 IncompressibleRegion ir(this); 967 if (is_simm21(distance) && ((distance % 2) == 0)) { 968 Assembler::jal(x0, distance); 969 } else { 970 assert(temp != noreg && temp != x0, "expecting a register"); 971 int32_t offset = 0; 972 la(temp, dest, offset); 973 jr(temp, offset); 974 } 975 } 976 977 void MacroAssembler::j(const Address &adr, Register temp) { 978 switch (adr.getMode()) { 979 case Address::literal: { 980 relocate(adr.rspec(), [&] { 981 j(adr.target(), temp); 982 }); 983 break; 984 } 985 case Address::base_plus_offset: { 986 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; 987 la(temp, Address(adr.base(), adr.offset() - offset)); 988 jr(temp, offset); 989 break; 990 } 991 default: 992 ShouldNotReachHere(); 993 } 994 } 995 996 void MacroAssembler::j(Label &lab, Register temp) { 997 assert_different_registers(x0, temp); 998 if (lab.is_bound()) { 999 MacroAssembler::j(target(lab), temp); 1000 } else { 1001 lab.add_patch_at(code(), locator()); 1002 MacroAssembler::j(pc(), temp); 1003 } 1004 } 1005 1006 void MacroAssembler::jr(Register Rd, int32_t offset) { 1007 assert(Rd != noreg, "expecting a register"); 1008 Assembler::jalr(x0, Rd, offset); 1009 } 1010 1011 void MacroAssembler::call(const address dest, Register temp) { 1012 assert_cond(dest != nullptr); 1013 assert(temp != noreg, "expecting a register"); 1014 int32_t offset = 0; 1015 la(temp, dest, offset); 1016 jalr(temp, offset); 1017 } 1018 1019 void MacroAssembler::jalr(Register Rs, int32_t offset) { 1020 assert(Rs != noreg, "expecting a register"); 1021 Assembler::jalr(x1, Rs, offset); 1022 } 1023 1024 void MacroAssembler::rt_call(address dest, Register tmp) { 1025 CodeBlob *cb = CodeCache::find_blob(dest); 1026 RuntimeAddress target(dest); 1027 if (cb) { 1028 far_call(target, tmp); 1029 } else { 1030 relocate(target.rspec(), [&] { 1031 int32_t offset; 1032 la(tmp, target.target(), offset); 1033 jalr(tmp, offset); 1034 }); 1035 } 1036 } 1037 1038 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 1039 if (L.is_bound()) { 1040 (this->*insn)(Rt, target(L)); 1041 } else { 1042 L.add_patch_at(code(), locator()); 1043 (this->*insn)(Rt, pc()); 1044 } 1045 } 1046 1047 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 1048 compare_and_branch_insn insn, 1049 compare_and_branch_label_insn neg_insn, bool is_far) { 1050 if (is_far) { 1051 Label done; 1052 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 1053 j(L); 1054 bind(done); 1055 } else { 1056 if (L.is_bound()) { 1057 (this->*insn)(r1, r2, target(L)); 1058 } else { 1059 L.add_patch_at(code(), locator()); 1060 (this->*insn)(r1, r2, pc()); 1061 } 1062 } 1063 } 1064 1065 #define INSN(NAME, NEG_INSN) \ 1066 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 1067 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 1068 } 1069 1070 INSN(beq, bne); 1071 INSN(bne, beq); 1072 INSN(blt, bge); 1073 INSN(bge, blt); 1074 INSN(bltu, bgeu); 1075 INSN(bgeu, bltu); 1076 1077 #undef INSN 1078 1079 #define INSN(NAME) \ 1080 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 1081 NAME(Rs, zr, dest); \ 1082 } \ 1083 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 1084 NAME(Rs, zr, l, is_far); \ 1085 } \ 1086 1087 INSN(beq); 1088 INSN(bne); 1089 INSN(blt); 1090 INSN(ble); 1091 INSN(bge); 1092 INSN(bgt); 1093 1094 #undef INSN 1095 1096 #define INSN(NAME, NEG_INSN) \ 1097 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 1098 NEG_INSN(Rt, Rs, dest); \ 1099 } \ 1100 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 1101 NEG_INSN(Rt, Rs, l, is_far); \ 1102 } 1103 1104 INSN(bgt, blt); 1105 INSN(ble, bge); 1106 INSN(bgtu, bltu); 1107 INSN(bleu, bgeu); 1108 1109 #undef INSN 1110 1111 // Float compare branch instructions 1112 1113 #define INSN(NAME, FLOATCMP, BRANCH) \ 1114 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1115 FLOATCMP##_s(t0, Rs1, Rs2); \ 1116 BRANCH(t0, l, is_far); \ 1117 } \ 1118 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1119 FLOATCMP##_d(t0, Rs1, Rs2); \ 1120 BRANCH(t0, l, is_far); \ 1121 } 1122 1123 INSN(beq, feq, bnez); 1124 INSN(bne, feq, beqz); 1125 1126 #undef INSN 1127 1128 1129 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1130 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1131 bool is_far, bool is_unordered) { \ 1132 if (is_unordered) { \ 1133 /* jump if either source is NaN or condition is expected */ \ 1134 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1135 beqz(t0, l, is_far); \ 1136 } else { \ 1137 /* jump if no NaN in source and condition is expected */ \ 1138 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1139 bnez(t0, l, is_far); \ 1140 } \ 1141 } \ 1142 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1143 bool is_far, bool is_unordered) { \ 1144 if (is_unordered) { \ 1145 /* jump if either source is NaN or condition is expected */ \ 1146 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1147 beqz(t0, l, is_far); \ 1148 } else { \ 1149 /* jump if no NaN in source and condition is expected */ \ 1150 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1151 bnez(t0, l, is_far); \ 1152 } \ 1153 } 1154 1155 INSN(ble, fle, flt); 1156 INSN(blt, flt, fle); 1157 1158 #undef INSN 1159 1160 #define INSN(NAME, CMP) \ 1161 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1162 bool is_far, bool is_unordered) { \ 1163 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1164 } \ 1165 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1166 bool is_far, bool is_unordered) { \ 1167 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1168 } 1169 1170 INSN(bgt, blt); 1171 INSN(bge, ble); 1172 1173 #undef INSN 1174 1175 1176 #define INSN(NAME, CSR) \ 1177 void MacroAssembler::NAME(Register Rd) { \ 1178 csrr(Rd, CSR); \ 1179 } 1180 1181 INSN(rdinstret, CSR_INSTRET); 1182 INSN(rdcycle, CSR_CYCLE); 1183 INSN(rdtime, CSR_TIME); 1184 INSN(frcsr, CSR_FCSR); 1185 INSN(frrm, CSR_FRM); 1186 INSN(frflags, CSR_FFLAGS); 1187 1188 #undef INSN 1189 1190 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1191 csrrs(Rd, csr, x0); 1192 } 1193 1194 #define INSN(NAME, OPFUN) \ 1195 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1196 OPFUN(x0, csr, Rs); \ 1197 } 1198 1199 INSN(csrw, csrrw); 1200 INSN(csrs, csrrs); 1201 INSN(csrc, csrrc); 1202 1203 #undef INSN 1204 1205 #define INSN(NAME, OPFUN) \ 1206 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1207 OPFUN(x0, csr, imm); \ 1208 } 1209 1210 INSN(csrwi, csrrwi); 1211 INSN(csrsi, csrrsi); 1212 INSN(csrci, csrrci); 1213 1214 #undef INSN 1215 1216 #define INSN(NAME, CSR) \ 1217 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1218 csrrw(Rd, CSR, Rs); \ 1219 } 1220 1221 INSN(fscsr, CSR_FCSR); 1222 INSN(fsrm, CSR_FRM); 1223 INSN(fsflags, CSR_FFLAGS); 1224 1225 #undef INSN 1226 1227 #define INSN(NAME) \ 1228 void MacroAssembler::NAME(Register Rs) { \ 1229 NAME(x0, Rs); \ 1230 } 1231 1232 INSN(fscsr); 1233 INSN(fsrm); 1234 INSN(fsflags); 1235 1236 #undef INSN 1237 1238 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1239 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1240 csrrwi(Rd, CSR_FRM, imm); 1241 } 1242 1243 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1244 csrrwi(Rd, CSR_FFLAGS, imm); 1245 } 1246 1247 #define INSN(NAME) \ 1248 void MacroAssembler::NAME(unsigned imm) { \ 1249 NAME(x0, imm); \ 1250 } 1251 1252 INSN(fsrmi); 1253 INSN(fsflagsi); 1254 1255 #undef INSN 1256 1257 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) { 1258 if (RestoreMXCSROnJNICalls) { 1259 Label skip_fsrmi; 1260 frrm(tmp); 1261 // Set FRM to the state we need. We do want Round to Nearest. 1262 // We don't want non-IEEE rounding modes. 1263 guarantee(RoundingMode::rne == 0, "must be"); 1264 beqz(tmp, skip_fsrmi); // Only reset FRM if it's wrong 1265 fsrmi(RoundingMode::rne); 1266 bind(skip_fsrmi); 1267 } 1268 } 1269 1270 void MacroAssembler::push_reg(Register Rs) 1271 { 1272 addi(esp, esp, 0 - wordSize); 1273 sd(Rs, Address(esp, 0)); 1274 } 1275 1276 void MacroAssembler::pop_reg(Register Rd) 1277 { 1278 ld(Rd, Address(esp, 0)); 1279 addi(esp, esp, wordSize); 1280 } 1281 1282 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1283 int count = 0; 1284 // Scan bitset to accumulate register pairs 1285 for (int reg = 31; reg >= 0; reg--) { 1286 if ((1U << 31) & bitset) { 1287 regs[count++] = reg; 1288 } 1289 bitset <<= 1; 1290 } 1291 return count; 1292 } 1293 1294 // Push integer registers in the bitset supplied. Don't push sp. 1295 // Return the number of words pushed 1296 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1297 DEBUG_ONLY(int words_pushed = 0;) 1298 unsigned char regs[32]; 1299 int count = bitset_to_regs(bitset, regs); 1300 // reserve one slot to align for odd count 1301 int offset = is_even(count) ? 0 : wordSize; 1302 1303 if (count) { 1304 addi(stack, stack, -count * wordSize - offset); 1305 } 1306 for (int i = count - 1; i >= 0; i--) { 1307 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1308 DEBUG_ONLY(words_pushed++;) 1309 } 1310 1311 assert(words_pushed == count, "oops, pushed != count"); 1312 1313 return count; 1314 } 1315 1316 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1317 DEBUG_ONLY(int words_popped = 0;) 1318 unsigned char regs[32]; 1319 int count = bitset_to_regs(bitset, regs); 1320 // reserve one slot to align for odd count 1321 int offset = is_even(count) ? 0 : wordSize; 1322 1323 for (int i = count - 1; i >= 0; i--) { 1324 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1325 DEBUG_ONLY(words_popped++;) 1326 } 1327 1328 if (count) { 1329 addi(stack, stack, count * wordSize + offset); 1330 } 1331 assert(words_popped == count, "oops, popped != count"); 1332 1333 return count; 1334 } 1335 1336 // Push floating-point registers in the bitset supplied. 1337 // Return the number of words pushed 1338 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1339 DEBUG_ONLY(int words_pushed = 0;) 1340 unsigned char regs[32]; 1341 int count = bitset_to_regs(bitset, regs); 1342 int push_slots = count + (count & 1); 1343 1344 if (count) { 1345 addi(stack, stack, -push_slots * wordSize); 1346 } 1347 1348 for (int i = count - 1; i >= 0; i--) { 1349 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1350 DEBUG_ONLY(words_pushed++;) 1351 } 1352 1353 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1354 1355 return count; 1356 } 1357 1358 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1359 DEBUG_ONLY(int words_popped = 0;) 1360 unsigned char regs[32]; 1361 int count = bitset_to_regs(bitset, regs); 1362 int pop_slots = count + (count & 1); 1363 1364 for (int i = count - 1; i >= 0; i--) { 1365 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1366 DEBUG_ONLY(words_popped++;) 1367 } 1368 1369 if (count) { 1370 addi(stack, stack, pop_slots * wordSize); 1371 } 1372 1373 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1374 1375 return count; 1376 } 1377 1378 static const int64_t right_32_bits = right_n_bits(32); 1379 static const int64_t right_8_bits = right_n_bits(8); 1380 1381 /** 1382 * Emits code to update CRC-32 with a byte value according to constants in table 1383 * 1384 * @param [in,out]crc Register containing the crc. 1385 * @param [in]val Register containing the byte to fold into the CRC. 1386 * @param [in]table Register containing the table of crc constants. 1387 * 1388 * uint32_t crc; 1389 * val = crc_table[(val ^ crc) & 0xFF]; 1390 * crc = val ^ (crc >> 8); 1391 * 1392 */ 1393 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 1394 assert_different_registers(crc, val, table); 1395 1396 xorr(val, val, crc); 1397 andi(val, val, right_8_bits); 1398 shadd(val, val, table, val, 2); 1399 lwu(val, Address(val)); 1400 srli(crc, crc, 8); 1401 xorr(crc, val, crc); 1402 } 1403 1404 /** 1405 * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 1406 * 1407 * @param [in,out]crc Register containing the crc. 1408 * @param [in]v Register containing the 32-bit to fold into the CRC. 1409 * @param [in]table0 Register containing table 0 of crc constants. 1410 * @param [in]table1 Register containing table 1 of crc constants. 1411 * @param [in]table2 Register containing table 2 of crc constants. 1412 * @param [in]table3 Register containing table 3 of crc constants. 1413 * 1414 * uint32_t crc; 1415 * v = crc ^ v 1416 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 1417 * 1418 */ 1419 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3, 1420 Register table0, Register table1, Register table2, Register table3, bool upper) { 1421 assert_different_registers(crc, v, tmp1, tmp2, tmp3, table0, table1, table2, table3); 1422 1423 if (upper) 1424 srli(v, v, 32); 1425 xorr(v, v, crc); 1426 1427 andi(tmp1, v, right_8_bits); 1428 shadd(tmp1, tmp1, table3, tmp2, 2); 1429 lwu(crc, Address(tmp1)); 1430 1431 slli(tmp1, v, 16); 1432 slli(tmp3, v, 8); 1433 1434 srliw(tmp1, tmp1, 24); 1435 srliw(tmp3, tmp3, 24); 1436 1437 shadd(tmp1, tmp1, table2, tmp1, 2); 1438 lwu(tmp2, Address(tmp1)); 1439 1440 shadd(tmp3, tmp3, table1, tmp3, 2); 1441 xorr(crc, crc, tmp2); 1442 1443 lwu(tmp2, Address(tmp3)); 1444 // It is more optimal to use 'srli' instead of 'srliw' for case when it is not necessary to clean upper bits 1445 if (upper) 1446 srli(tmp1, v, 24); 1447 else 1448 srliw(tmp1, v, 24); 1449 1450 // no need to clear bits other than lowest two 1451 shadd(tmp1, tmp1, table0, tmp1, 2); 1452 xorr(crc, crc, tmp2); 1453 lwu(tmp2, Address(tmp1)); 1454 xorr(crc, crc, tmp2); 1455 } 1456 1457 1458 #ifdef COMPILER2 1459 // This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c. 1460 // To make it, following steps are taken: 1461 // 1. in zcrc32.c, modify N to 16 and related code, 1462 // 2. re-generate the tables needed, we use tables of (N == 16, W == 4) 1463 // 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code). 1464 // New tables for vector version is after table3. 1465 void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register len, 1466 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 1467 Register table0, Register table3) { 1468 assert_different_registers(t1, crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp5, table0, table3); 1469 const int N = 16, W = 4; 1470 const int64_t single_table_size = 256; 1471 const Register blks = tmp2; 1472 const Register tmpTable = tmp3, tableN16 = tmp4; 1473 const VectorRegister vcrc = v4, vword = v8, vtmp = v12; 1474 Label VectorLoop; 1475 Label LastBlock; 1476 1477 add(tableN16, table3, 1*single_table_size*sizeof(juint), tmp1); 1478 mv(tmp5, 0xff); 1479 1480 if (MaxVectorSize == 16) { 1481 vsetivli(zr, N, Assembler::e32, Assembler::m4, Assembler::ma, Assembler::ta); 1482 } else if (MaxVectorSize == 32) { 1483 vsetivli(zr, N, Assembler::e32, Assembler::m2, Assembler::ma, Assembler::ta); 1484 } else { 1485 assert(MaxVectorSize > 32, "sanity"); 1486 vsetivli(zr, N, Assembler::e32, Assembler::m1, Assembler::ma, Assembler::ta); 1487 } 1488 1489 vmv_v_x(vcrc, zr); 1490 vmv_s_x(vcrc, crc); 1491 1492 // multiple of 64 1493 srli(blks, len, 6); 1494 slli(t1, blks, 6); 1495 sub(len, len, t1); 1496 sub(blks, blks, 1); 1497 blez(blks, LastBlock); 1498 1499 bind(VectorLoop); 1500 { 1501 mv(tmpTable, tableN16); 1502 1503 vle32_v(vword, buf); 1504 vxor_vv(vword, vword, vcrc); 1505 1506 addi(buf, buf, N*4); 1507 1508 vand_vx(vtmp, vword, tmp5); 1509 vsll_vi(vtmp, vtmp, 2); 1510 vluxei32_v(vcrc, tmpTable, vtmp); 1511 1512 mv(tmp1, 1); 1513 for (int k = 1; k < W; k++) { 1514 addi(tmpTable, tmpTable, single_table_size*4); 1515 1516 slli(t1, tmp1, 3); 1517 vsrl_vx(vtmp, vword, t1); 1518 1519 vand_vx(vtmp, vtmp, tmp5); 1520 vsll_vi(vtmp, vtmp, 2); 1521 vluxei32_v(vtmp, tmpTable, vtmp); 1522 1523 vxor_vv(vcrc, vcrc, vtmp); 1524 1525 addi(tmp1, tmp1, 1); 1526 } 1527 1528 sub(blks, blks, 1); 1529 bgtz(blks, VectorLoop); 1530 } 1531 1532 bind(LastBlock); 1533 { 1534 vle32_v(vtmp, buf); 1535 vxor_vv(vcrc, vcrc, vtmp); 1536 mv(crc, zr); 1537 for (int i = 0; i < N; i++) { 1538 vmv_x_s(tmp2, vcrc); 1539 // in vmv_x_s, the value is sign-extended to SEW bits, but we need zero-extended here. 1540 zext_w(tmp2, tmp2); 1541 vslidedown_vi(vcrc, vcrc, 1); 1542 xorr(crc, crc, tmp2); 1543 for (int j = 0; j < W; j++) { 1544 andr(t1, crc, tmp5); 1545 shadd(t1, t1, table0, tmp1, 2); 1546 lwu(t1, Address(t1, 0)); 1547 srli(tmp2, crc, 8); 1548 xorr(crc, tmp2, t1); 1549 } 1550 } 1551 addi(buf, buf, N*4); 1552 } 1553 } 1554 #endif // COMPILER2 1555 1556 /** 1557 * @param crc register containing existing CRC (32-bit) 1558 * @param buf register pointing to input byte buffer (byte*) 1559 * @param len register containing number of bytes 1560 * @param table register that will contain address of CRC table 1561 * @param tmp scratch registers 1562 */ 1563 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 1564 Register table0, Register table1, Register table2, Register table3, 1565 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) { 1566 assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 1567 Label L_vector_entry, 1568 L_unroll_loop, 1569 L_by4_loop_entry, L_by4_loop, 1570 L_by1_loop, L_exit; 1571 1572 const int64_t single_table_size = 256; 1573 const int64_t unroll = 16; 1574 const int64_t unroll_words = unroll*wordSize; 1575 mv(tmp5, right_32_bits); 1576 andn(crc, tmp5, crc); 1577 1578 const ExternalAddress table_addr = StubRoutines::crc_table_addr(); 1579 la(table0, table_addr); 1580 add(table1, table0, 1*single_table_size*sizeof(juint), tmp1); 1581 add(table2, table0, 2*single_table_size*sizeof(juint), tmp1); 1582 add(table3, table2, 1*single_table_size*sizeof(juint), tmp1); 1583 1584 #ifdef COMPILER2 1585 if (UseRVV) { 1586 const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5; 1587 mv(tmp1, tmp_limit); 1588 bge(len, tmp1, L_vector_entry); 1589 } 1590 #endif // COMPILER2 1591 1592 mv(tmp1, unroll_words); 1593 blt(len, tmp1, L_by4_loop_entry); 1594 1595 const Register loop_buf_end = tmp3; 1596 1597 align(CodeEntryAlignment); 1598 // Entry for L_unroll_loop 1599 add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below 1600 andi(len, len, unroll_words-1); // len = (len % unroll_words) 1601 sub(loop_buf_end, loop_buf_end, len); 1602 bind(L_unroll_loop); 1603 for (int i = 0; i < unroll; i++) { 1604 ld(tmp1, Address(buf, i*wordSize)); 1605 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1606 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, true); 1607 } 1608 1609 addi(buf, buf, unroll_words); 1610 blt(buf, loop_buf_end, L_unroll_loop); 1611 1612 bind(L_by4_loop_entry); 1613 mv(tmp1, 4); 1614 blt(len, tmp1, L_by1_loop); 1615 add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below 1616 andi(len, len, 3); 1617 sub(loop_buf_end, loop_buf_end, len); 1618 bind(L_by4_loop); 1619 lwu(tmp1, Address(buf)); 1620 update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); 1621 addi(buf, buf, 4); 1622 blt(buf, loop_buf_end, L_by4_loop); 1623 1624 bind(L_by1_loop); 1625 beqz(len, L_exit); 1626 1627 subw(len, len, 1); 1628 lwu(tmp1, Address(buf)); 1629 andi(tmp2, tmp1, right_8_bits); 1630 update_byte_crc32(crc, tmp2, table0); 1631 beqz(len, L_exit); 1632 1633 subw(len, len, 1); 1634 srli(tmp2, tmp1, 8); 1635 andi(tmp2, tmp2, right_8_bits); 1636 update_byte_crc32(crc, tmp2, table0); 1637 beqz(len, L_exit); 1638 1639 subw(len, len, 1); 1640 srli(tmp2, tmp1, 16); 1641 andi(tmp2, tmp2, right_8_bits); 1642 update_byte_crc32(crc, tmp2, table0); 1643 1644 #ifdef COMPILER2 1645 // put vector code here, otherwise "offset is too large" error occurs. 1646 if (UseRVV) { 1647 // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`. 1648 j(L_exit); 1649 1650 bind(L_vector_entry); 1651 vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3); 1652 1653 bgtz(len, L_by4_loop_entry); 1654 } 1655 #endif // COMPILER2 1656 1657 bind(L_exit); 1658 andn(crc, tmp5, crc); 1659 } 1660 1661 #ifdef COMPILER2 1662 // Push vector registers in the bitset supplied. 1663 // Return the number of words pushed 1664 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1665 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1666 1667 // Scan bitset to accumulate register pairs 1668 unsigned char regs[32]; 1669 int count = bitset_to_regs(bitset, regs); 1670 1671 for (int i = 0; i < count; i++) { 1672 sub(stack, stack, vector_size_in_bytes); 1673 vs1r_v(as_VectorRegister(regs[i]), stack); 1674 } 1675 1676 return count * vector_size_in_bytes / wordSize; 1677 } 1678 1679 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1680 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1681 1682 // Scan bitset to accumulate register pairs 1683 unsigned char regs[32]; 1684 int count = bitset_to_regs(bitset, regs); 1685 1686 for (int i = count - 1; i >= 0; i--) { 1687 vl1r_v(as_VectorRegister(regs[i]), stack); 1688 add(stack, stack, vector_size_in_bytes); 1689 } 1690 1691 return count * vector_size_in_bytes / wordSize; 1692 } 1693 #endif // COMPILER2 1694 1695 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1696 // Push integer registers x7, x10-x17, x28-x31. 1697 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1698 1699 // Push float registers f0-f7, f10-f17, f28-f31. 1700 addi(sp, sp, - wordSize * 20); 1701 int offset = 0; 1702 for (int i = 0; i < 32; i++) { 1703 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1704 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1705 } 1706 } 1707 } 1708 1709 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1710 int offset = 0; 1711 for (int i = 0; i < 32; i++) { 1712 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1713 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1714 } 1715 } 1716 addi(sp, sp, wordSize * 20); 1717 1718 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1719 } 1720 1721 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1722 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1723 push_reg(RegSet::range(x5, x31), sp); 1724 1725 // float registers 1726 addi(sp, sp, - 32 * wordSize); 1727 for (int i = 0; i < 32; i++) { 1728 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1729 } 1730 1731 // vector registers 1732 if (save_vectors) { 1733 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1734 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1735 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1736 add(t0, sp, vector_size_in_bytes * i); 1737 vse64_v(as_VectorRegister(i), t0); 1738 } 1739 } 1740 } 1741 1742 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1743 // vector registers 1744 if (restore_vectors) { 1745 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1746 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1747 vle64_v(as_VectorRegister(i), sp); 1748 add(sp, sp, vector_size_in_bytes * 8); 1749 } 1750 } 1751 1752 // float registers 1753 for (int i = 0; i < 32; i++) { 1754 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1755 } 1756 addi(sp, sp, 32 * wordSize); 1757 1758 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1759 pop_reg(RegSet::range(x5, x31), sp); 1760 } 1761 1762 static int patch_offset_in_jal(address branch, int64_t offset) { 1763 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1764 "offset is too large to be patched in one jal instruction!\n"); 1765 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1766 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1767 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1768 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1769 return MacroAssembler::instruction_size; // only one instruction 1770 } 1771 1772 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1773 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1774 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1775 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1776 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1777 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1778 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1779 return MacroAssembler::instruction_size; // only one instruction 1780 } 1781 1782 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1783 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1784 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1785 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1786 return PC_RELATIVE_INSTRUCTION_NUM * MacroAssembler::instruction_size; 1787 } 1788 1789 static int patch_addr_in_movptr1(address branch, address target) { 1790 int32_t lower = ((intptr_t)target << 35) >> 35; 1791 int64_t upper = ((intptr_t)target - lower) >> 29; 1792 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1793 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1794 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1795 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1796 return MacroAssembler::movptr1_instruction_size; 1797 } 1798 1799 static int patch_addr_in_movptr2(address instruction_address, address target) { 1800 uintptr_t addr = (uintptr_t)target; 1801 1802 assert(addr < (1ull << 48), "48-bit overflow in address constant"); 1803 unsigned int upper18 = (addr >> 30ull); 1804 int lower30 = (addr & 0x3fffffffu); 1805 int low12 = (lower30 << 20) >> 20; 1806 int mid18 = ((lower30 - low12) >> 12); 1807 1808 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 0), 31, 12, (upper18 & 0xfffff)); // Lui 1809 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 1), 31, 12, (mid18 & 0xfffff)); // Lui 1810 // Slli 1811 // Add 1812 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 4), 31, 20, low12 & 0xfff); // Addi/Jalr/Load 1813 1814 assert(MacroAssembler::target_addr_for_insn(instruction_address) == target, "Must be"); 1815 1816 return MacroAssembler::movptr2_instruction_size; 1817 } 1818 1819 static int patch_imm_in_li16u(address branch, uint16_t target) { 1820 Assembler::patch(branch, 31, 12, target); // patch lui only 1821 return MacroAssembler::instruction_size; 1822 } 1823 1824 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1825 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1826 int64_t upper = (intptr_t)target; 1827 int32_t lower = (((int32_t)target) << 20) >> 20; 1828 upper -= lower; 1829 upper = (int32_t)upper; 1830 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1831 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1832 return LI32_INSTRUCTIONS_NUM * MacroAssembler::instruction_size; 1833 } 1834 1835 static long get_offset_of_jal(address insn_addr) { 1836 assert_cond(insn_addr != nullptr); 1837 long offset = 0; 1838 unsigned insn = Assembler::ld_instr(insn_addr); 1839 long val = (long)Assembler::sextract(insn, 31, 12); 1840 offset |= ((val >> 19) & 0x1) << 20; 1841 offset |= (val & 0xff) << 12; 1842 offset |= ((val >> 8) & 0x1) << 11; 1843 offset |= ((val >> 9) & 0x3ff) << 1; 1844 offset = (offset << 43) >> 43; 1845 return offset; 1846 } 1847 1848 static long get_offset_of_conditional_branch(address insn_addr) { 1849 long offset = 0; 1850 assert_cond(insn_addr != nullptr); 1851 unsigned insn = Assembler::ld_instr(insn_addr); 1852 offset = (long)Assembler::sextract(insn, 31, 31); 1853 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1854 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1855 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1856 offset = (offset << 41) >> 41; 1857 return offset; 1858 } 1859 1860 static long get_offset_of_pc_relative(address insn_addr) { 1861 long offset = 0; 1862 assert_cond(insn_addr != nullptr); 1863 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1864 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1865 offset = (offset << 32) >> 32; 1866 return offset; 1867 } 1868 1869 static address get_target_of_movptr1(address insn_addr) { 1870 assert_cond(insn_addr != nullptr); 1871 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1872 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1873 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1874 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1875 return (address) target_address; 1876 } 1877 1878 static address get_target_of_movptr2(address insn_addr) { 1879 assert_cond(insn_addr != nullptr); 1880 int32_t upper18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 0), 31, 12)) & 0xfffff); // Lui 1881 int32_t mid18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 1), 31, 12)) & 0xfffff); // Lui 1882 // 2 // Slli 1883 // 3 // Add 1884 int32_t low12 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 4), 31, 20))); // Addi/Jalr/Load. 1885 address ret = (address)(((intptr_t)upper18<<30ll) + ((intptr_t)mid18<<12ll) + low12); 1886 return ret; 1887 } 1888 1889 address MacroAssembler::get_target_of_li32(address insn_addr) { 1890 assert_cond(insn_addr != nullptr); 1891 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1892 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1893 return (address)target_address; 1894 } 1895 1896 // Patch any kind of instruction; there may be several instructions. 1897 // Return the total length (in bytes) of the instructions. 1898 int MacroAssembler::pd_patch_instruction_size(address instruction_address, address target) { 1899 assert_cond(instruction_address != nullptr); 1900 int64_t offset = target - instruction_address; 1901 if (MacroAssembler::is_jal_at(instruction_address)) { // jal 1902 return patch_offset_in_jal(instruction_address, offset); 1903 } else if (MacroAssembler::is_branch_at(instruction_address)) { // beq/bge/bgeu/blt/bltu/bne 1904 return patch_offset_in_conditional_branch(instruction_address, offset); 1905 } else if (MacroAssembler::is_pc_relative_at(instruction_address)) { // auipc, addi/jalr/load 1906 return patch_offset_in_pc_relative(instruction_address, offset); 1907 } else if (MacroAssembler::is_movptr1_at(instruction_address)) { // movptr1 1908 return patch_addr_in_movptr1(instruction_address, target); 1909 } else if (MacroAssembler::is_movptr2_at(instruction_address)) { // movptr2 1910 return patch_addr_in_movptr2(instruction_address, target); 1911 } else if (MacroAssembler::is_li32_at(instruction_address)) { // li32 1912 int64_t imm = (intptr_t)target; 1913 return patch_imm_in_li32(instruction_address, (int32_t)imm); 1914 } else if (MacroAssembler::is_li16u_at(instruction_address)) { 1915 int64_t imm = (intptr_t)target; 1916 return patch_imm_in_li16u(instruction_address, (uint16_t)imm); 1917 } else { 1918 #ifdef ASSERT 1919 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1920 Assembler::ld_instr(instruction_address), p2i(instruction_address)); 1921 Disassembler::decode(instruction_address - 16, instruction_address + 16); 1922 #endif 1923 ShouldNotReachHere(); 1924 return -1; 1925 } 1926 } 1927 1928 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1929 long offset = 0; 1930 assert_cond(insn_addr != nullptr); 1931 if (MacroAssembler::is_jal_at(insn_addr)) { // jal 1932 offset = get_offset_of_jal(insn_addr); 1933 } else if (MacroAssembler::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1934 offset = get_offset_of_conditional_branch(insn_addr); 1935 } else if (MacroAssembler::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1936 offset = get_offset_of_pc_relative(insn_addr); 1937 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { // movptr1 1938 return get_target_of_movptr1(insn_addr); 1939 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { // movptr2 1940 return get_target_of_movptr2(insn_addr); 1941 } else if (MacroAssembler::is_li32_at(insn_addr)) { // li32 1942 return get_target_of_li32(insn_addr); 1943 } else { 1944 ShouldNotReachHere(); 1945 } 1946 return address(((uintptr_t)insn_addr + offset)); 1947 } 1948 1949 int MacroAssembler::patch_oop(address insn_addr, address o) { 1950 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1951 // narrow OOPs by setting the upper 16 bits in the first 1952 // instruction. 1953 if (MacroAssembler::is_li32_at(insn_addr)) { 1954 // Move narrow OOP 1955 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1956 return patch_imm_in_li32(insn_addr, (int32_t)n); 1957 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { 1958 // Move wide OOP 1959 return patch_addr_in_movptr1(insn_addr, o); 1960 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { 1961 // Move wide OOP 1962 return patch_addr_in_movptr2(insn_addr, o); 1963 } 1964 ShouldNotReachHere(); 1965 return -1; 1966 } 1967 1968 void MacroAssembler::reinit_heapbase() { 1969 if (UseCompressedOops) { 1970 if (Universe::is_fully_initialized()) { 1971 mv(xheapbase, CompressedOops::base()); 1972 } else { 1973 ExternalAddress target(CompressedOops::base_addr()); 1974 relocate(target.rspec(), [&] { 1975 int32_t offset; 1976 la(xheapbase, target.target(), offset); 1977 ld(xheapbase, Address(xheapbase, offset)); 1978 }); 1979 } 1980 } 1981 } 1982 1983 void MacroAssembler::movptr(Register Rd, address addr, Register temp) { 1984 int offset = 0; 1985 movptr(Rd, addr, offset, temp); 1986 addi(Rd, Rd, offset); 1987 } 1988 1989 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset, Register temp) { 1990 uint64_t uimm64 = (uint64_t)addr; 1991 #ifndef PRODUCT 1992 { 1993 char buffer[64]; 1994 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, uimm64); 1995 block_comment(buffer); 1996 } 1997 #endif 1998 assert(uimm64 < (1ull << 48), "48-bit overflow in address constant"); 1999 2000 if (temp == noreg) { 2001 movptr1(Rd, uimm64, offset); 2002 } else { 2003 movptr2(Rd, uimm64, offset, temp); 2004 } 2005 } 2006 2007 void MacroAssembler::movptr1(Register Rd, uint64_t imm64, int32_t &offset) { 2008 // Load upper 31 bits 2009 // 2010 // In case of 11th bit of `lower` is 0, it's straightforward to understand. 2011 // In case of 11th bit of `lower` is 1, it's a bit tricky, to help understand, 2012 // imagine divide both `upper` and `lower` into 2 parts respectively, i.e. 2013 // [upper_20, upper_12], [lower_20, lower_12], they are the same just before 2014 // `lower = (lower << 52) >> 52;`. 2015 // After `upper -= lower;`, 2016 // upper_20' = upper_20 - (-1) == upper_20 + 1 2017 // upper_12 = 0x000 2018 // After `lui(Rd, upper);`, `Rd` = upper_20' << 12 2019 // Also divide `Rd` into 2 parts [Rd_20, Rd_12], 2020 // Rd_20 == upper_20' 2021 // Rd_12 == 0x000 2022 // After `addi(Rd, Rd, lower);`, 2023 // Rd_20 = upper_20' + (-1) == upper_20 + 1 - 1 = upper_20 2024 // Rd_12 = lower_12 2025 // So, finally Rd == [upper_20, lower_12] 2026 int64_t imm = imm64 >> 17; 2027 int64_t upper = imm, lower = imm; 2028 lower = (lower << 52) >> 52; 2029 upper -= lower; 2030 upper = (int32_t)upper; 2031 lui(Rd, upper); 2032 addi(Rd, Rd, lower); 2033 2034 // Load the rest 17 bits. 2035 slli(Rd, Rd, 11); 2036 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 2037 slli(Rd, Rd, 6); 2038 2039 // This offset will be used by following jalr/ld. 2040 offset = imm64 & 0x3f; 2041 } 2042 2043 void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Register tmp) { 2044 assert_different_registers(Rd, tmp, noreg); 2045 2046 // addr: [upper18, lower30[mid18, lower12]] 2047 2048 int64_t upper18 = addr >> 18; 2049 lui(tmp, upper18); 2050 2051 int64_t lower30 = addr & 0x3fffffff; 2052 int64_t mid18 = lower30, lower12 = lower30; 2053 lower12 = (lower12 << 52) >> 52; 2054 // For this tricky part (`mid18 -= lower12;` + `offset = lower12;`), 2055 // please refer to movptr1 above. 2056 mid18 -= (int32_t)lower12; 2057 lui(Rd, mid18); 2058 2059 slli(tmp, tmp, 18); 2060 add(Rd, Rd, tmp); 2061 2062 offset = lower12; 2063 } 2064 2065 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 2066 if (is_simm12(increment)) { 2067 addi(Rd, Rn, increment); 2068 } else { 2069 assert_different_registers(Rn, temp); 2070 li(temp, increment); 2071 add(Rd, Rn, temp); 2072 } 2073 } 2074 2075 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 2076 if (is_simm12(increment)) { 2077 addiw(Rd, Rn, increment); 2078 } else { 2079 assert_different_registers(Rn, temp); 2080 li(temp, increment); 2081 addw(Rd, Rn, temp); 2082 } 2083 } 2084 2085 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 2086 add(Rd, Rn, -decrement, temp); 2087 } 2088 2089 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 2090 addw(Rd, Rn, -decrement, temp); 2091 } 2092 2093 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 2094 andr(Rd, Rs1, Rs2); 2095 sign_extend(Rd, Rd, 32); 2096 } 2097 2098 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 2099 orr(Rd, Rs1, Rs2); 2100 sign_extend(Rd, Rd, 32); 2101 } 2102 2103 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 2104 xorr(Rd, Rs1, Rs2); 2105 sign_extend(Rd, Rd, 32); 2106 } 2107 2108 // Rd = Rs1 & (~Rd2) 2109 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 2110 if (UseZbb) { 2111 Assembler::andn(Rd, Rs1, Rs2); 2112 return; 2113 } 2114 2115 notr(Rd, Rs2); 2116 andr(Rd, Rs1, Rd); 2117 } 2118 2119 // Rd = Rs1 | (~Rd2) 2120 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 2121 if (UseZbb) { 2122 Assembler::orn(Rd, Rs1, Rs2); 2123 return; 2124 } 2125 2126 notr(Rd, Rs2); 2127 orr(Rd, Rs1, Rd); 2128 } 2129 2130 // Note: load_unsigned_short used to be called load_unsigned_word. 2131 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2132 int off = offset(); 2133 lhu(dst, src); 2134 return off; 2135 } 2136 2137 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2138 int off = offset(); 2139 lbu(dst, src); 2140 return off; 2141 } 2142 2143 int MacroAssembler::load_signed_short(Register dst, Address src) { 2144 int off = offset(); 2145 lh(dst, src); 2146 return off; 2147 } 2148 2149 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2150 int off = offset(); 2151 lb(dst, src); 2152 return off; 2153 } 2154 2155 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2156 switch (size_in_bytes) { 2157 case 8: ld(dst, src); break; 2158 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 2159 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2160 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2161 default: ShouldNotReachHere(); 2162 } 2163 } 2164 2165 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 2166 switch (size_in_bytes) { 2167 case 8: sd(src, dst); break; 2168 case 4: sw(src, dst); break; 2169 case 2: sh(src, dst); break; 2170 case 1: sb(src, dst); break; 2171 default: ShouldNotReachHere(); 2172 } 2173 } 2174 2175 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 2176 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2177 if (granularity != 1 && granularity != 2) { 2178 ShouldNotReachHere(); 2179 } 2180 if (AvoidUnalignedAccesses && (granularity != 2)) { 2181 assert_different_registers(dst, tmp); 2182 assert_different_registers(tmp, src.base()); 2183 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 2184 slli(tmp, tmp, 8); 2185 lbu(dst, src); 2186 add(dst, dst, tmp); 2187 } else { 2188 is_signed ? lh(dst, src) : lhu(dst, src); 2189 } 2190 } 2191 2192 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 2193 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2194 if (AvoidUnalignedAccesses && (granularity != 4)) { 2195 switch(granularity) { 2196 case 1: 2197 assert_different_registers(dst, tmp, src.base()); 2198 lbu(dst, src); 2199 lbu(tmp, Address(src.base(), src.offset() + 1)); 2200 slli(tmp, tmp, 8); 2201 add(dst, dst, tmp); 2202 lbu(tmp, Address(src.base(), src.offset() + 2)); 2203 slli(tmp, tmp, 16); 2204 add(dst, dst, tmp); 2205 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 2206 slli(tmp, tmp, 24); 2207 add(dst, dst, tmp); 2208 break; 2209 case 2: 2210 assert_different_registers(dst, tmp); 2211 assert_different_registers(tmp, src.base()); 2212 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 2213 slli(tmp, tmp, 16); 2214 lhu(dst, src); 2215 add(dst, dst, tmp); 2216 break; 2217 default: 2218 ShouldNotReachHere(); 2219 } 2220 } else { 2221 is_signed ? lw(dst, src) : lwu(dst, src); 2222 } 2223 } 2224 2225 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 2226 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 2227 if (AvoidUnalignedAccesses && (granularity != 8)) { 2228 switch(granularity){ 2229 case 1: 2230 assert_different_registers(dst, tmp, src.base()); 2231 lbu(dst, src); 2232 lbu(tmp, Address(src.base(), src.offset() + 1)); 2233 slli(tmp, tmp, 8); 2234 add(dst, dst, tmp); 2235 lbu(tmp, Address(src.base(), src.offset() + 2)); 2236 slli(tmp, tmp, 16); 2237 add(dst, dst, tmp); 2238 lbu(tmp, Address(src.base(), src.offset() + 3)); 2239 slli(tmp, tmp, 24); 2240 add(dst, dst, tmp); 2241 lbu(tmp, Address(src.base(), src.offset() + 4)); 2242 slli(tmp, tmp, 32); 2243 add(dst, dst, tmp); 2244 lbu(tmp, Address(src.base(), src.offset() + 5)); 2245 slli(tmp, tmp, 40); 2246 add(dst, dst, tmp); 2247 lbu(tmp, Address(src.base(), src.offset() + 6)); 2248 slli(tmp, tmp, 48); 2249 add(dst, dst, tmp); 2250 lbu(tmp, Address(src.base(), src.offset() + 7)); 2251 slli(tmp, tmp, 56); 2252 add(dst, dst, tmp); 2253 break; 2254 case 2: 2255 assert_different_registers(dst, tmp, src.base()); 2256 lhu(dst, src); 2257 lhu(tmp, Address(src.base(), src.offset() + 2)); 2258 slli(tmp, tmp, 16); 2259 add(dst, dst, tmp); 2260 lhu(tmp, Address(src.base(), src.offset() + 4)); 2261 slli(tmp, tmp, 32); 2262 add(dst, dst, tmp); 2263 lhu(tmp, Address(src.base(), src.offset() + 6)); 2264 slli(tmp, tmp, 48); 2265 add(dst, dst, tmp); 2266 break; 2267 case 4: 2268 assert_different_registers(dst, tmp); 2269 assert_different_registers(tmp, src.base()); 2270 lwu(tmp, Address(src.base(), src.offset() + 4)); 2271 slli(tmp, tmp, 32); 2272 lwu(dst, src); 2273 add(dst, dst, tmp); 2274 break; 2275 default: 2276 ShouldNotReachHere(); 2277 } 2278 } else { 2279 ld(dst, src); 2280 } 2281 } 2282 2283 2284 // reverse bytes in halfword in lower 16 bits and sign-extend 2285 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 2286 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 2287 if (UseZbb) { 2288 rev8(Rd, Rs); 2289 srai(Rd, Rd, 48); 2290 return; 2291 } 2292 assert_different_registers(Rs, tmp); 2293 assert_different_registers(Rd, tmp); 2294 srli(tmp, Rs, 8); 2295 andi(tmp, tmp, 0xFF); 2296 slli(Rd, Rs, 56); 2297 srai(Rd, Rd, 48); // sign-extend 2298 orr(Rd, Rd, tmp); 2299 } 2300 2301 // reverse bytes in lower word and sign-extend 2302 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 2303 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2304 if (UseZbb) { 2305 rev8(Rd, Rs); 2306 srai(Rd, Rd, 32); 2307 return; 2308 } 2309 assert_different_registers(Rs, tmp1, tmp2); 2310 assert_different_registers(Rd, tmp1, tmp2); 2311 revb_h_w_u(Rd, Rs, tmp1, tmp2); 2312 slli(tmp2, Rd, 48); 2313 srai(tmp2, tmp2, 32); // sign-extend 2314 srli(Rd, Rd, 16); 2315 orr(Rd, Rd, tmp2); 2316 } 2317 2318 // reverse bytes in halfword in lower 16 bits and zero-extend 2319 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2320 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 2321 if (UseZbb) { 2322 rev8(Rd, Rs); 2323 srli(Rd, Rd, 48); 2324 return; 2325 } 2326 assert_different_registers(Rs, tmp); 2327 assert_different_registers(Rd, tmp); 2328 srli(tmp, Rs, 8); 2329 andi(tmp, tmp, 0xFF); 2330 andi(Rd, Rs, 0xFF); 2331 slli(Rd, Rd, 8); 2332 orr(Rd, Rd, tmp); 2333 } 2334 2335 // reverse bytes in halfwords in lower 32 bits and zero-extend 2336 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2337 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2338 if (UseZbb) { 2339 rev8(Rd, Rs); 2340 rori(Rd, Rd, 32); 2341 roriw(Rd, Rd, 16); 2342 zero_extend(Rd, Rd, 32); 2343 return; 2344 } 2345 assert_different_registers(Rs, tmp1, tmp2); 2346 assert_different_registers(Rd, tmp1, tmp2); 2347 srli(tmp2, Rs, 16); 2348 revb_h_h_u(tmp2, tmp2, tmp1); 2349 revb_h_h_u(Rd, Rs, tmp1); 2350 slli(tmp2, tmp2, 16); 2351 orr(Rd, Rd, tmp2); 2352 } 2353 2354 // This method is only used for revb_h 2355 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 2356 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2357 assert_different_registers(Rs, tmp1, tmp2); 2358 assert_different_registers(Rd, tmp1); 2359 srli(tmp1, Rs, 48); 2360 andi(tmp2, tmp1, 0xFF); 2361 slli(tmp2, tmp2, 8); 2362 srli(tmp1, tmp1, 8); 2363 orr(tmp1, tmp1, tmp2); 2364 slli(Rd, Rs, 16); 2365 orr(Rd, Rd, tmp1); 2366 } 2367 2368 // reverse bytes in each halfword 2369 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 2370 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2371 if (UseZbb) { 2372 assert_different_registers(Rs, tmp1); 2373 assert_different_registers(Rd, tmp1); 2374 rev8(Rd, Rs); 2375 zero_extend(tmp1, Rd, 32); 2376 roriw(tmp1, tmp1, 16); 2377 slli(tmp1, tmp1, 32); 2378 srli(Rd, Rd, 32); 2379 roriw(Rd, Rd, 16); 2380 zero_extend(Rd, Rd, 32); 2381 orr(Rd, Rd, tmp1); 2382 return; 2383 } 2384 assert_different_registers(Rs, tmp1, tmp2); 2385 assert_different_registers(Rd, tmp1, tmp2); 2386 revb_h_helper(Rd, Rs, tmp1, tmp2); 2387 for (int i = 0; i < 3; ++i) { 2388 revb_h_helper(Rd, Rd, tmp1, tmp2); 2389 } 2390 } 2391 2392 // reverse bytes in each word 2393 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 2394 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2395 if (UseZbb) { 2396 rev8(Rd, Rs); 2397 rori(Rd, Rd, 32); 2398 return; 2399 } 2400 assert_different_registers(Rs, tmp1, tmp2); 2401 assert_different_registers(Rd, tmp1, tmp2); 2402 revb(Rd, Rs, tmp1, tmp2); 2403 ror_imm(Rd, Rd, 32); 2404 } 2405 2406 // reverse bytes in doubleword 2407 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 2408 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2409 if (UseZbb) { 2410 rev8(Rd, Rs); 2411 return; 2412 } 2413 assert_different_registers(Rs, tmp1, tmp2); 2414 assert_different_registers(Rd, tmp1, tmp2); 2415 andi(tmp1, Rs, 0xFF); 2416 slli(tmp1, tmp1, 8); 2417 for (int step = 8; step < 56; step += 8) { 2418 srli(tmp2, Rs, step); 2419 andi(tmp2, tmp2, 0xFF); 2420 orr(tmp1, tmp1, tmp2); 2421 slli(tmp1, tmp1, 8); 2422 } 2423 srli(Rd, Rs, 56); 2424 andi(Rd, Rd, 0xFF); 2425 orr(Rd, tmp1, Rd); 2426 } 2427 2428 // rotate right with shift bits 2429 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 2430 { 2431 if (UseZbb) { 2432 rori(dst, src, shift); 2433 return; 2434 } 2435 2436 assert_different_registers(dst, tmp); 2437 assert_different_registers(src, tmp); 2438 assert(shift < 64, "shift amount must be < 64"); 2439 slli(tmp, src, 64 - shift); 2440 srli(dst, src, shift); 2441 orr(dst, dst, tmp); 2442 } 2443 2444 // rotate left with shift bits, 32-bit version 2445 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 2446 if (UseZbb) { 2447 // no roliw available 2448 roriw(dst, src, 32 - shift); 2449 return; 2450 } 2451 2452 assert_different_registers(dst, tmp); 2453 assert_different_registers(src, tmp); 2454 assert(shift < 32, "shift amount must be < 32"); 2455 srliw(tmp, src, 32 - shift); 2456 slliw(dst, src, shift); 2457 orr(dst, dst, tmp); 2458 } 2459 2460 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2461 if (is_simm12(imm)) { 2462 and_imm12(Rd, Rn, imm); 2463 } else { 2464 assert_different_registers(Rn, tmp); 2465 mv(tmp, imm); 2466 andr(Rd, Rn, tmp); 2467 } 2468 } 2469 2470 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2471 ld(tmp1, adr); 2472 if (src.is_register()) { 2473 orr(tmp1, tmp1, src.as_register()); 2474 } else { 2475 if (is_simm12(src.as_constant())) { 2476 ori(tmp1, tmp1, src.as_constant()); 2477 } else { 2478 assert_different_registers(tmp1, tmp2); 2479 mv(tmp2, src.as_constant()); 2480 orr(tmp1, tmp1, tmp2); 2481 } 2482 } 2483 sd(tmp1, adr); 2484 } 2485 2486 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2487 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2488 if (UseCompressedClassPointers) { 2489 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2490 if (CompressedKlassPointers::base() == nullptr) { 2491 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2492 beq(trial_klass, tmp1, L); 2493 return; 2494 } 2495 decode_klass_not_null(tmp1, tmp2); 2496 } else { 2497 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2498 } 2499 beq(trial_klass, tmp1, L); 2500 } 2501 2502 // Move an oop into a register. 2503 void MacroAssembler::movoop(Register dst, jobject obj) { 2504 int oop_index; 2505 if (obj == nullptr) { 2506 oop_index = oop_recorder()->allocate_oop_index(obj); 2507 } else { 2508 #ifdef ASSERT 2509 { 2510 ThreadInVMfromUnknown tiv; 2511 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2512 } 2513 #endif 2514 oop_index = oop_recorder()->find_index(obj); 2515 } 2516 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2517 2518 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2519 la(dst, Address((address)obj, rspec)); 2520 } else { 2521 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2522 ld_constant(dst, Address(dummy, rspec)); 2523 } 2524 } 2525 2526 // Move a metadata address into a register. 2527 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2528 assert((uintptr_t)obj < (1ull << 48), "48-bit overflow in metadata"); 2529 int oop_index; 2530 if (obj == nullptr) { 2531 oop_index = oop_recorder()->allocate_metadata_index(obj); 2532 } else { 2533 oop_index = oop_recorder()->find_index(obj); 2534 } 2535 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2536 la(dst, Address((address)obj, rspec)); 2537 } 2538 2539 // Writes to stack successive pages until offset reached to check for 2540 // stack overflow + shadow pages. This clobbers tmp. 2541 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2542 assert_different_registers(tmp, size, t0); 2543 // Bang stack for total size given plus shadow page size. 2544 // Bang one page at a time because large size can bang beyond yellow and 2545 // red zones. 2546 mv(t0, (int)os::vm_page_size()); 2547 Label loop; 2548 bind(loop); 2549 sub(tmp, sp, t0); 2550 subw(size, size, t0); 2551 sd(size, Address(tmp)); 2552 bgtz(size, loop); 2553 2554 // Bang down shadow pages too. 2555 // At this point, (tmp-0) is the last address touched, so don't 2556 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2557 // was post-decremented.) Skip this address by starting at i=1, and 2558 // touch a few more pages below. N.B. It is important to touch all 2559 // the way down to and including i=StackShadowPages. 2560 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2561 // this could be any sized move but this is can be a debugging crumb 2562 // so the bigger the better. 2563 sub(tmp, tmp, (int)os::vm_page_size()); 2564 sd(size, Address(tmp, 0)); 2565 } 2566 } 2567 2568 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2569 int32_t offset = 0; 2570 _masm = masm; 2571 ExternalAddress target((address)flag_addr); 2572 _masm->relocate(target.rspec(), [&] { 2573 int32_t offset; 2574 _masm->la(t0, target.target(), offset); 2575 _masm->lbu(t0, Address(t0, offset)); 2576 }); 2577 if (value) { 2578 _masm->bnez(t0, _label); 2579 } else { 2580 _masm->beqz(t0, _label); 2581 } 2582 } 2583 2584 SkipIfEqual::~SkipIfEqual() { 2585 _masm->bind(_label); 2586 _masm = nullptr; 2587 } 2588 2589 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2590 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2591 ld(dst, Address(xmethod, Method::const_offset())); 2592 ld(dst, Address(dst, ConstMethod::constants_offset())); 2593 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2594 ld(dst, Address(dst, mirror_offset)); 2595 resolve_oop_handle(dst, tmp1, tmp2); 2596 } 2597 2598 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2599 // OopHandle::resolve is an indirection. 2600 assert_different_registers(result, tmp1, tmp2); 2601 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2602 } 2603 2604 // ((WeakHandle)result).resolve() 2605 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2606 assert_different_registers(result, tmp1, tmp2); 2607 Label resolved; 2608 2609 // A null weak handle resolves to null. 2610 beqz(result, resolved); 2611 2612 // Only 64 bit platforms support GCs that require a tmp register 2613 // Only IN_HEAP loads require a thread_tmp register 2614 // WeakHandle::resolve is an indirection like jweak. 2615 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2616 result, Address(result), tmp1, tmp2); 2617 bind(resolved); 2618 } 2619 2620 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2621 Register dst, Address src, 2622 Register tmp1, Register tmp2) { 2623 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2624 decorators = AccessInternal::decorator_fixup(decorators, type); 2625 bool as_raw = (decorators & AS_RAW) != 0; 2626 if (as_raw) { 2627 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2628 } else { 2629 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2630 } 2631 } 2632 2633 void MacroAssembler::null_check(Register reg, int offset) { 2634 if (needs_explicit_null_check(offset)) { 2635 // provoke OS null exception if reg is null by 2636 // accessing M[reg] w/o changing any registers 2637 // NOTE: this is plenty to provoke a segv 2638 ld(zr, Address(reg, 0)); 2639 } else { 2640 // nothing to do, (later) access of M[reg + offset] 2641 // will provoke OS null exception if reg is null 2642 } 2643 } 2644 2645 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2646 Address dst, Register val, 2647 Register tmp1, Register tmp2, Register tmp3) { 2648 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2649 decorators = AccessInternal::decorator_fixup(decorators, type); 2650 bool as_raw = (decorators & AS_RAW) != 0; 2651 if (as_raw) { 2652 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2653 } else { 2654 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2655 } 2656 } 2657 2658 // Algorithm must match CompressedOops::encode. 2659 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2660 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2661 if (CompressedOops::base() == nullptr) { 2662 if (CompressedOops::shift() != 0) { 2663 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2664 srli(d, s, LogMinObjAlignmentInBytes); 2665 } else { 2666 mv(d, s); 2667 } 2668 } else { 2669 Label notNull; 2670 sub(d, s, xheapbase); 2671 bgez(d, notNull); 2672 mv(d, zr); 2673 bind(notNull); 2674 if (CompressedOops::shift() != 0) { 2675 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2676 srli(d, d, CompressedOops::shift()); 2677 } 2678 } 2679 } 2680 2681 void MacroAssembler::encode_heap_oop_not_null(Register r) { 2682 #ifdef ASSERT 2683 if (CheckCompressedOops) { 2684 Label ok; 2685 bnez(r, ok); 2686 stop("null oop passed to encode_heap_oop_not_null"); 2687 bind(ok); 2688 } 2689 #endif 2690 verify_oop_msg(r, "broken oop in encode_heap_oop_not_null"); 2691 if (CompressedOops::base() != nullptr) { 2692 sub(r, r, xheapbase); 2693 } 2694 if (CompressedOops::shift() != 0) { 2695 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2696 srli(r, r, LogMinObjAlignmentInBytes); 2697 } 2698 } 2699 2700 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2701 #ifdef ASSERT 2702 if (CheckCompressedOops) { 2703 Label ok; 2704 bnez(src, ok); 2705 stop("null oop passed to encode_heap_oop_not_null2"); 2706 bind(ok); 2707 } 2708 #endif 2709 verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2"); 2710 2711 Register data = src; 2712 if (CompressedOops::base() != nullptr) { 2713 sub(dst, src, xheapbase); 2714 data = dst; 2715 } 2716 if (CompressedOops::shift() != 0) { 2717 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2718 srli(dst, data, LogMinObjAlignmentInBytes); 2719 data = dst; 2720 } 2721 if (data == src) { 2722 mv(dst, src); 2723 } 2724 } 2725 2726 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2727 assert_different_registers(dst, tmp); 2728 assert_different_registers(src, tmp); 2729 if (UseCompressedClassPointers) { 2730 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2731 decode_klass_not_null(dst, tmp); 2732 } else { 2733 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2734 } 2735 } 2736 2737 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2738 // FIXME: Should this be a store release? concurrent gcs assumes 2739 // klass length is valid if klass field is not null. 2740 if (UseCompressedClassPointers) { 2741 encode_klass_not_null(src, tmp); 2742 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2743 } else { 2744 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2745 } 2746 } 2747 2748 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2749 if (UseCompressedClassPointers) { 2750 // Store to klass gap in destination 2751 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2752 } 2753 } 2754 2755 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2756 assert_different_registers(r, tmp); 2757 decode_klass_not_null(r, r, tmp); 2758 } 2759 2760 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2761 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2762 2763 if (CompressedKlassPointers::base() == nullptr) { 2764 if (CompressedKlassPointers::shift() != 0) { 2765 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2766 slli(dst, src, LogKlassAlignmentInBytes); 2767 } else { 2768 mv(dst, src); 2769 } 2770 return; 2771 } 2772 2773 Register xbase = dst; 2774 if (dst == src) { 2775 xbase = tmp; 2776 } 2777 2778 assert_different_registers(src, xbase); 2779 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2780 2781 if (CompressedKlassPointers::shift() != 0) { 2782 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2783 assert_different_registers(t0, xbase); 2784 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2785 } else { 2786 add(dst, xbase, src); 2787 } 2788 } 2789 2790 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2791 assert_different_registers(r, tmp); 2792 encode_klass_not_null(r, r, tmp); 2793 } 2794 2795 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2796 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2797 2798 if (CompressedKlassPointers::base() == nullptr) { 2799 if (CompressedKlassPointers::shift() != 0) { 2800 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2801 srli(dst, src, LogKlassAlignmentInBytes); 2802 } else { 2803 mv(dst, src); 2804 } 2805 return; 2806 } 2807 2808 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2809 CompressedKlassPointers::shift() == 0) { 2810 zero_extend(dst, src, 32); 2811 return; 2812 } 2813 2814 Register xbase = dst; 2815 if (dst == src) { 2816 xbase = tmp; 2817 } 2818 2819 assert_different_registers(src, xbase); 2820 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2821 sub(dst, src, xbase); 2822 if (CompressedKlassPointers::shift() != 0) { 2823 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2824 srli(dst, dst, LogKlassAlignmentInBytes); 2825 } 2826 } 2827 2828 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2829 decode_heap_oop_not_null(r, r); 2830 } 2831 2832 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2833 assert(UseCompressedOops, "should only be used for compressed headers"); 2834 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2835 // Cannot assert, unverified entry point counts instructions (see .ad file) 2836 // vtableStubs also counts instructions in pd_code_size_limit. 2837 // Also do not verify_oop as this is called by verify_oop. 2838 if (CompressedOops::shift() != 0) { 2839 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2840 slli(dst, src, LogMinObjAlignmentInBytes); 2841 if (CompressedOops::base() != nullptr) { 2842 add(dst, xheapbase, dst); 2843 } 2844 } else { 2845 assert(CompressedOops::base() == nullptr, "sanity"); 2846 mv(dst, src); 2847 } 2848 } 2849 2850 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2851 if (CompressedOops::base() == nullptr) { 2852 if (CompressedOops::shift() != 0 || d != s) { 2853 slli(d, s, CompressedOops::shift()); 2854 } 2855 } else { 2856 Label done; 2857 mv(d, s); 2858 beqz(s, done); 2859 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2860 bind(done); 2861 } 2862 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2863 } 2864 2865 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2866 Register tmp2, Register tmp3, DecoratorSet decorators) { 2867 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2868 } 2869 2870 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2871 Register tmp2, DecoratorSet decorators) { 2872 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2873 } 2874 2875 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2876 Register tmp2, DecoratorSet decorators) { 2877 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2878 } 2879 2880 // Used for storing nulls. 2881 void MacroAssembler::store_heap_oop_null(Address dst) { 2882 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2883 } 2884 2885 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2886 bool want_remainder, bool is_signed) 2887 { 2888 // Full implementation of Java idiv and irem. The function 2889 // returns the (pc) offset of the div instruction - may be needed 2890 // for implicit exceptions. 2891 // 2892 // input : rs1: dividend 2893 // rs2: divisor 2894 // 2895 // result: either 2896 // quotient (= rs1 idiv rs2) 2897 // remainder (= rs1 irem rs2) 2898 2899 2900 int idivl_offset = offset(); 2901 if (!want_remainder) { 2902 if (is_signed) { 2903 divw(result, rs1, rs2); 2904 } else { 2905 divuw(result, rs1, rs2); 2906 } 2907 } else { 2908 // result = rs1 % rs2; 2909 if (is_signed) { 2910 remw(result, rs1, rs2); 2911 } else { 2912 remuw(result, rs1, rs2); 2913 } 2914 } 2915 return idivl_offset; 2916 } 2917 2918 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2919 bool want_remainder, bool is_signed) 2920 { 2921 // Full implementation of Java ldiv and lrem. The function 2922 // returns the (pc) offset of the div instruction - may be needed 2923 // for implicit exceptions. 2924 // 2925 // input : rs1: dividend 2926 // rs2: divisor 2927 // 2928 // result: either 2929 // quotient (= rs1 idiv rs2) 2930 // remainder (= rs1 irem rs2) 2931 2932 int idivq_offset = offset(); 2933 if (!want_remainder) { 2934 if (is_signed) { 2935 div(result, rs1, rs2); 2936 } else { 2937 divu(result, rs1, rs2); 2938 } 2939 } else { 2940 // result = rs1 % rs2; 2941 if (is_signed) { 2942 rem(result, rs1, rs2); 2943 } else { 2944 remu(result, rs1, rs2); 2945 } 2946 } 2947 return idivq_offset; 2948 } 2949 2950 // Look up the method for a megamorpic invkkeinterface call. 2951 // The target method is determined by <intf_klass, itable_index>. 2952 // The receiver klass is in recv_klass. 2953 // On success, the result will be in method_result, and execution falls through. 2954 // On failure, execution transfers to the given label. 2955 void MacroAssembler::lookup_interface_method(Register recv_klass, 2956 Register intf_klass, 2957 RegisterOrConstant itable_index, 2958 Register method_result, 2959 Register scan_tmp, 2960 Label& L_no_such_interface, 2961 bool return_method) { 2962 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2963 assert_different_registers(method_result, intf_klass, scan_tmp); 2964 assert(recv_klass != method_result || !return_method, 2965 "recv_klass can be destroyed when mehtid isn't needed"); 2966 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2967 "caller must be same register for non-constant itable index as for method"); 2968 2969 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2970 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2971 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2972 int scan_step = itableOffsetEntry::size() * wordSize; 2973 int vte_size = vtableEntry::size_in_bytes(); 2974 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2975 2976 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2977 2978 // Could store the aligned, prescaled offset in the klass. 2979 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2980 add(scan_tmp, scan_tmp, vtable_base); 2981 2982 if (return_method) { 2983 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2984 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2985 if (itable_index.is_register()) { 2986 slli(t0, itable_index.as_register(), 3); 2987 } else { 2988 mv(t0, itable_index.as_constant() << 3); 2989 } 2990 add(recv_klass, recv_klass, t0); 2991 if (itentry_off) { 2992 add(recv_klass, recv_klass, itentry_off); 2993 } 2994 } 2995 2996 Label search, found_method; 2997 2998 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2999 beq(intf_klass, method_result, found_method); 3000 bind(search); 3001 // Check that the previous entry is non-null. A null entry means that 3002 // the receiver class doesn't implement the interface, and wasn't the 3003 // same as when the caller was compiled. 3004 beqz(method_result, L_no_such_interface, /* is_far */ true); 3005 addi(scan_tmp, scan_tmp, scan_step); 3006 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 3007 bne(intf_klass, method_result, search); 3008 3009 bind(found_method); 3010 3011 // Got a hit. 3012 if (return_method) { 3013 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 3014 add(method_result, recv_klass, scan_tmp); 3015 ld(method_result, Address(method_result)); 3016 } 3017 } 3018 3019 // Look up the method for a megamorphic invokeinterface call in a single pass over itable: 3020 // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData 3021 // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index 3022 // The target method is determined by <holder_klass, itable_index>. 3023 // The receiver klass is in recv_klass. 3024 // On success, the result will be in method_result, and execution falls through. 3025 // On failure, execution transfers to the given label. 3026 void MacroAssembler::lookup_interface_method_stub(Register recv_klass, 3027 Register holder_klass, 3028 Register resolved_klass, 3029 Register method_result, 3030 Register temp_itbl_klass, 3031 Register scan_temp, 3032 int itable_index, 3033 Label& L_no_such_interface) { 3034 // 'method_result' is only used as output register at the very end of this method. 3035 // Until then we can reuse it as 'holder_offset'. 3036 Register holder_offset = method_result; 3037 assert_different_registers(resolved_klass, recv_klass, holder_klass, temp_itbl_klass, scan_temp, holder_offset); 3038 3039 int vtable_start_offset_bytes = in_bytes(Klass::vtable_start_offset()); 3040 int scan_step = itableOffsetEntry::size() * wordSize; 3041 int ioffset_bytes = in_bytes(itableOffsetEntry::interface_offset()); 3042 int ooffset_bytes = in_bytes(itableOffsetEntry::offset_offset()); 3043 int itmentry_off_bytes = in_bytes(itableMethodEntry::method_offset()); 3044 const int vte_scale = exact_log2(vtableEntry::size_in_bytes()); 3045 3046 Label L_loop_search_resolved_entry, L_resolved_found, L_holder_found; 3047 3048 lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); 3049 add(recv_klass, recv_klass, vtable_start_offset_bytes + ioffset_bytes); 3050 // itableOffsetEntry[] itable = recv_klass + Klass::vtable_start_offset() 3051 // + sizeof(vtableEntry) * (recv_klass->_vtable_len); 3052 // scan_temp = &(itable[0]._interface) 3053 // temp_itbl_klass = itable[0]._interface; 3054 shadd(scan_temp, scan_temp, recv_klass, scan_temp, vte_scale); 3055 ld(temp_itbl_klass, Address(scan_temp)); 3056 mv(holder_offset, zr); 3057 3058 // Initial checks: 3059 // - if (holder_klass != resolved_klass), go to "scan for resolved" 3060 // - if (itable[0] == holder_klass), shortcut to "holder found" 3061 // - if (itable[0] == 0), no such interface 3062 bne(resolved_klass, holder_klass, L_loop_search_resolved_entry); 3063 beq(holder_klass, temp_itbl_klass, L_holder_found); 3064 beqz(temp_itbl_klass, L_no_such_interface); 3065 3066 // Loop: Look for holder_klass record in itable 3067 // do { 3068 // temp_itbl_klass = *(scan_temp += scan_step); 3069 // if (temp_itbl_klass == holder_klass) { 3070 // goto L_holder_found; // Found! 3071 // } 3072 // } while (temp_itbl_klass != 0); 3073 // goto L_no_such_interface // Not found. 3074 Label L_search_holder; 3075 bind(L_search_holder); 3076 add(scan_temp, scan_temp, scan_step); 3077 ld(temp_itbl_klass, Address(scan_temp)); 3078 beq(holder_klass, temp_itbl_klass, L_holder_found); 3079 bnez(temp_itbl_klass, L_search_holder); 3080 3081 j(L_no_such_interface); 3082 3083 // Loop: Look for resolved_class record in itable 3084 // while (true) { 3085 // temp_itbl_klass = *(scan_temp += scan_step); 3086 // if (temp_itbl_klass == 0) { 3087 // goto L_no_such_interface; 3088 // } 3089 // if (temp_itbl_klass == resolved_klass) { 3090 // goto L_resolved_found; // Found! 3091 // } 3092 // if (temp_itbl_klass == holder_klass) { 3093 // holder_offset = scan_temp; 3094 // } 3095 // } 3096 // 3097 Label L_loop_search_resolved; 3098 bind(L_loop_search_resolved); 3099 add(scan_temp, scan_temp, scan_step); 3100 ld(temp_itbl_klass, Address(scan_temp)); 3101 bind(L_loop_search_resolved_entry); 3102 beqz(temp_itbl_klass, L_no_such_interface); 3103 beq(resolved_klass, temp_itbl_klass, L_resolved_found); 3104 bne(holder_klass, temp_itbl_klass, L_loop_search_resolved); 3105 mv(holder_offset, scan_temp); 3106 j(L_loop_search_resolved); 3107 3108 // See if we already have a holder klass. If not, go and scan for it. 3109 bind(L_resolved_found); 3110 beqz(holder_offset, L_search_holder); 3111 mv(scan_temp, holder_offset); 3112 3113 // Finally, scan_temp contains holder_klass vtable offset 3114 bind(L_holder_found); 3115 lwu(method_result, Address(scan_temp, ooffset_bytes - ioffset_bytes)); 3116 add(recv_klass, recv_klass, itable_index * wordSize + itmentry_off_bytes 3117 - vtable_start_offset_bytes - ioffset_bytes); // substract offsets to restore the original value of recv_klass 3118 add(method_result, recv_klass, method_result); 3119 ld(method_result, Address(method_result)); 3120 } 3121 3122 // virtual method calling 3123 void MacroAssembler::lookup_virtual_method(Register recv_klass, 3124 RegisterOrConstant vtable_index, 3125 Register method_result) { 3126 const ByteSize base = Klass::vtable_start_offset(); 3127 assert(vtableEntry::size() * wordSize == 8, 3128 "adjust the scaling in the code below"); 3129 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 3130 3131 if (vtable_index.is_register()) { 3132 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 3133 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 3134 } else { 3135 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 3136 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 3137 } 3138 } 3139 3140 void MacroAssembler::membar(uint32_t order_constraint) { 3141 address prev = pc() - MacroAssembler::instruction_size; 3142 address last = code()->last_insn(); 3143 3144 if (last != nullptr && is_membar(last) && prev == last) { 3145 // We are merging two memory barrier instructions. On RISCV we 3146 // can do this simply by ORing them together. 3147 set_membar_kind(prev, get_membar_kind(prev) | order_constraint); 3148 BLOCK_COMMENT("merged membar"); 3149 } else { 3150 code()->set_last_insn(pc()); 3151 3152 uint32_t predecessor = 0; 3153 uint32_t successor = 0; 3154 3155 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 3156 fence(predecessor, successor); 3157 } 3158 } 3159 3160 void MacroAssembler::cmodx_fence() { 3161 BLOCK_COMMENT("cmodx fence"); 3162 if (VM_Version::supports_fencei_barrier()) { 3163 Assembler::fencei(); 3164 } 3165 } 3166 3167 // Form an address from base + offset in Rd. Rd my or may not 3168 // actually be used: you must use the Address that is returned. It 3169 // is up to you to ensure that the shift provided matches the size 3170 // of your data. 3171 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 3172 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 3173 return Address(base, byte_offset); 3174 } 3175 3176 assert_different_registers(Rd, base, noreg); 3177 3178 // Do it the hard way 3179 mv(Rd, byte_offset); 3180 add(Rd, base, Rd); 3181 return Address(Rd); 3182 } 3183 3184 void MacroAssembler::check_klass_subtype(Register sub_klass, 3185 Register super_klass, 3186 Register tmp_reg, 3187 Label& L_success) { 3188 Label L_failure; 3189 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 3190 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 3191 bind(L_failure); 3192 } 3193 3194 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 3195 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 3196 if (acquire) { 3197 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 3198 } 3199 if (at_return) { 3200 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 3201 } else { 3202 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 3203 bnez(t0, slow_path, true /* is_far */); 3204 } 3205 } 3206 3207 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 3208 Label &succeed, Label *fail) { 3209 assert_different_registers(addr, tmp, t0); 3210 assert_different_registers(newv, tmp, t0); 3211 assert_different_registers(oldv, tmp, t0); 3212 3213 // oldv holds comparison value 3214 // newv holds value to write in exchange 3215 // addr identifies memory word to compare against/update 3216 if (UseZacas) { 3217 mv(tmp, oldv); 3218 atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); 3219 beq(tmp, oldv, succeed); 3220 } else { 3221 Label retry_load, nope; 3222 bind(retry_load); 3223 // Load reserved from the memory location 3224 load_reserved(tmp, addr, int64, Assembler::aqrl); 3225 // Fail and exit if it is not what we expect 3226 bne(tmp, oldv, nope); 3227 // If the store conditional succeeds, tmp will be zero 3228 store_conditional(tmp, newv, addr, int64, Assembler::rl); 3229 beqz(tmp, succeed); 3230 // Retry only when the store conditional failed 3231 j(retry_load); 3232 3233 bind(nope); 3234 } 3235 3236 // neither amocas nor lr/sc have an implied barrier in the failing case 3237 membar(AnyAny); 3238 3239 mv(oldv, tmp); 3240 if (fail != nullptr) { 3241 j(*fail); 3242 } 3243 } 3244 3245 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 3246 Label &succeed, Label *fail) { 3247 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 3248 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 3249 } 3250 3251 void MacroAssembler::load_reserved(Register dst, 3252 Register addr, 3253 enum operand_size size, 3254 Assembler::Aqrl acquire) { 3255 switch (size) { 3256 case int64: 3257 lr_d(dst, addr, acquire); 3258 break; 3259 case int32: 3260 lr_w(dst, addr, acquire); 3261 break; 3262 case uint32: 3263 lr_w(dst, addr, acquire); 3264 zero_extend(dst, dst, 32); 3265 break; 3266 default: 3267 ShouldNotReachHere(); 3268 } 3269 } 3270 3271 void MacroAssembler::store_conditional(Register dst, 3272 Register new_val, 3273 Register addr, 3274 enum operand_size size, 3275 Assembler::Aqrl release) { 3276 switch (size) { 3277 case int64: 3278 sc_d(dst, new_val, addr, release); 3279 break; 3280 case int32: 3281 case uint32: 3282 sc_w(dst, new_val, addr, release); 3283 break; 3284 default: 3285 ShouldNotReachHere(); 3286 } 3287 } 3288 3289 3290 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 3291 Register new_val, 3292 enum operand_size size, 3293 Register tmp1, Register tmp2, Register tmp3) { 3294 assert(size == int8 || size == int16, "unsupported operand size"); 3295 3296 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 3297 3298 andi(shift, addr, 3); 3299 slli(shift, shift, 3); 3300 3301 andi(aligned_addr, addr, ~3); 3302 3303 if (size == int8) { 3304 mv(mask, 0xff); 3305 } else { 3306 // size == int16 case 3307 mv(mask, -1); 3308 zero_extend(mask, mask, 16); 3309 } 3310 sll(mask, mask, shift); 3311 3312 notr(not_mask, mask); 3313 3314 sll(expected, expected, shift); 3315 andr(expected, expected, mask); 3316 3317 sll(new_val, new_val, shift); 3318 andr(new_val, new_val, mask); 3319 } 3320 3321 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 3322 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, 3323 // which are forced to work with 4-byte aligned address. 3324 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 3325 Register new_val, 3326 enum operand_size size, 3327 Assembler::Aqrl acquire, Assembler::Aqrl release, 3328 Register result, bool result_as_bool, 3329 Register tmp1, Register tmp2, Register tmp3) { 3330 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3331 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3332 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3333 3334 Label retry, fail, done; 3335 3336 bind(retry); 3337 3338 if (UseZacas) { 3339 lw(old, aligned_addr); 3340 3341 // if old & mask != expected 3342 andr(tmp, old, mask); 3343 bne(tmp, expected, fail); 3344 3345 andr(tmp, old, not_mask); 3346 orr(tmp, tmp, new_val); 3347 3348 atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release); 3349 bne(tmp, old, retry); 3350 } else { 3351 lr_w(old, aligned_addr, acquire); 3352 andr(tmp, old, mask); 3353 bne(tmp, expected, fail); 3354 3355 andr(tmp, old, not_mask); 3356 orr(tmp, tmp, new_val); 3357 sc_w(tmp, tmp, aligned_addr, release); 3358 bnez(tmp, retry); 3359 } 3360 3361 if (result_as_bool) { 3362 mv(result, 1); 3363 j(done); 3364 3365 bind(fail); 3366 mv(result, zr); 3367 3368 bind(done); 3369 } else { 3370 andr(tmp, old, mask); 3371 3372 bind(fail); 3373 srl(result, tmp, shift); 3374 3375 if (size == int8) { 3376 sign_extend(result, result, 8); 3377 } else { 3378 // size == int16 case 3379 sign_extend(result, result, 16); 3380 } 3381 } 3382 } 3383 3384 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 3385 // the weak CAS stuff. The major difference is that it just failed when store conditional 3386 // failed. 3387 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 3388 Register new_val, 3389 enum operand_size size, 3390 Assembler::Aqrl acquire, Assembler::Aqrl release, 3391 Register result, 3392 Register tmp1, Register tmp2, Register tmp3) { 3393 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3394 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3395 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3396 3397 Label fail, done; 3398 3399 if (UseZacas) { 3400 lw(old, aligned_addr); 3401 3402 // if old & mask != expected 3403 andr(tmp, old, mask); 3404 bne(tmp, expected, fail); 3405 3406 andr(tmp, old, not_mask); 3407 orr(tmp, tmp, new_val); 3408 3409 atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release); 3410 bne(tmp, old, fail); 3411 } else { 3412 lr_w(old, aligned_addr, acquire); 3413 andr(tmp, old, mask); 3414 bne(tmp, expected, fail); 3415 3416 andr(tmp, old, not_mask); 3417 orr(tmp, tmp, new_val); 3418 sc_w(tmp, tmp, aligned_addr, release); 3419 bnez(tmp, fail); 3420 } 3421 3422 // Success 3423 mv(result, 1); 3424 j(done); 3425 3426 // Fail 3427 bind(fail); 3428 mv(result, zr); 3429 3430 bind(done); 3431 } 3432 3433 void MacroAssembler::cmpxchg(Register addr, Register expected, 3434 Register new_val, 3435 enum operand_size size, 3436 Assembler::Aqrl acquire, Assembler::Aqrl release, 3437 Register result, bool result_as_bool) { 3438 assert(size != int8 && size != int16, "unsupported operand size"); 3439 assert_different_registers(addr, t0); 3440 assert_different_registers(expected, t0); 3441 assert_different_registers(new_val, t0); 3442 3443 if (UseZacas) { 3444 if (result_as_bool) { 3445 mv(t0, expected); 3446 atomic_cas(t0, new_val, addr, size, acquire, release); 3447 xorr(t0, t0, expected); 3448 seqz(result, t0); 3449 } else { 3450 mv(result, expected); 3451 atomic_cas(result, new_val, addr, size, acquire, release); 3452 } 3453 return; 3454 } 3455 3456 Label retry_load, done, ne_done; 3457 bind(retry_load); 3458 load_reserved(t0, addr, size, acquire); 3459 bne(t0, expected, ne_done); 3460 store_conditional(t0, new_val, addr, size, release); 3461 bnez(t0, retry_load); 3462 3463 // equal, succeed 3464 if (result_as_bool) { 3465 mv(result, 1); 3466 } else { 3467 mv(result, expected); 3468 } 3469 j(done); 3470 3471 // not equal, failed 3472 bind(ne_done); 3473 if (result_as_bool) { 3474 mv(result, zr); 3475 } else { 3476 mv(result, t0); 3477 } 3478 3479 bind(done); 3480 } 3481 3482 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 3483 Register new_val, 3484 enum operand_size size, 3485 Assembler::Aqrl acquire, Assembler::Aqrl release, 3486 Register result) { 3487 if (UseZacas) { 3488 cmpxchg(addr, expected, new_val, size, acquire, release, result, true); 3489 return; 3490 } 3491 3492 assert_different_registers(addr, t0); 3493 assert_different_registers(expected, t0); 3494 assert_different_registers(new_val, t0); 3495 3496 Label fail, done; 3497 load_reserved(t0, addr, size, acquire); 3498 bne(t0, expected, fail); 3499 store_conditional(t0, new_val, addr, size, release); 3500 bnez(t0, fail); 3501 3502 // Success 3503 mv(result, 1); 3504 j(done); 3505 3506 // Fail 3507 bind(fail); 3508 mv(result, zr); 3509 3510 bind(done); 3511 } 3512 3513 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 3514 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 3515 prev = prev->is_valid() ? prev : zr; \ 3516 if (incr.is_register()) { \ 3517 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3518 } else { \ 3519 mv(t0, incr.as_constant()); \ 3520 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3521 } \ 3522 return; \ 3523 } 3524 3525 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 3526 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 3527 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 3528 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 3529 3530 #undef ATOMIC_OP 3531 3532 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 3533 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3534 prev = prev->is_valid() ? prev : zr; \ 3535 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3536 return; \ 3537 } 3538 3539 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 3540 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 3541 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 3542 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 3543 3544 #undef ATOMIC_XCHG 3545 3546 #define ATOMIC_XCHGU(OP1, OP2) \ 3547 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3548 atomic_##OP2(prev, newv, addr); \ 3549 zero_extend(prev, prev, 32); \ 3550 return; \ 3551 } 3552 3553 ATOMIC_XCHGU(xchgwu, xchgw) 3554 ATOMIC_XCHGU(xchgalwu, xchgalw) 3555 3556 #undef ATOMIC_XCHGU 3557 3558 #define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \ 3559 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3560 assert(UseZacas, "invariant"); \ 3561 prev = prev->is_valid() ? prev : zr; \ 3562 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3563 return; \ 3564 } 3565 3566 ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed) 3567 ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed) 3568 ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl) 3569 ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl) 3570 ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl) 3571 ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl) 3572 3573 #undef ATOMIC_CAS 3574 3575 #define ATOMIC_CASU(OP1, OP2) \ 3576 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3577 atomic_##OP2(prev, newv, addr); \ 3578 zero_extend(prev, prev, 32); \ 3579 return; \ 3580 } 3581 3582 ATOMIC_CASU(caswu, casw) 3583 ATOMIC_CASU(caslwu, caslw) 3584 ATOMIC_CASU(casalwu, casalw) 3585 3586 #undef ATOMIC_CASU 3587 3588 void MacroAssembler::atomic_cas( 3589 Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { 3590 switch (size) { 3591 case int64: 3592 switch ((Assembler::Aqrl)(acquire | release)) { 3593 case Assembler::relaxed: 3594 atomic_cas(prev, newv, addr); 3595 break; 3596 case Assembler::rl: 3597 atomic_casl(prev, newv, addr); 3598 break; 3599 case Assembler::aqrl: 3600 atomic_casal(prev, newv, addr); 3601 break; 3602 default: 3603 ShouldNotReachHere(); 3604 } 3605 break; 3606 case int32: 3607 switch ((Assembler::Aqrl)(acquire | release)) { 3608 case Assembler::relaxed: 3609 atomic_casw(prev, newv, addr); 3610 break; 3611 case Assembler::rl: 3612 atomic_caslw(prev, newv, addr); 3613 break; 3614 case Assembler::aqrl: 3615 atomic_casalw(prev, newv, addr); 3616 break; 3617 default: 3618 ShouldNotReachHere(); 3619 } 3620 break; 3621 case uint32: 3622 switch ((Assembler::Aqrl)(acquire | release)) { 3623 case Assembler::relaxed: 3624 atomic_caswu(prev, newv, addr); 3625 break; 3626 case Assembler::rl: 3627 atomic_caslwu(prev, newv, addr); 3628 break; 3629 case Assembler::aqrl: 3630 atomic_casalwu(prev, newv, addr); 3631 break; 3632 default: 3633 ShouldNotReachHere(); 3634 } 3635 break; 3636 default: 3637 ShouldNotReachHere(); 3638 } 3639 } 3640 3641 void MacroAssembler::far_jump(const Address &entry, Register tmp) { 3642 assert(CodeCache::find_blob(entry.target()) != nullptr, 3643 "destination of far jump not found in code cache"); 3644 assert(entry.rspec().type() == relocInfo::external_word_type 3645 || entry.rspec().type() == relocInfo::runtime_call_type 3646 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3647 // Fixed length: see MacroAssembler::far_branch_size() 3648 // We can use auipc + jr here because we know that the total size of 3649 // the code cache cannot exceed 2Gb. 3650 relocate(entry.rspec(), [&] { 3651 int64_t distance = entry.target() - pc(); 3652 int32_t offset = ((int32_t)distance << 20) >> 20; 3653 assert(is_valid_32bit_offset(distance), "Far jump using wrong instructions."); 3654 auipc(tmp, (int32_t)distance + 0x800); 3655 jr(tmp, offset); 3656 }); 3657 } 3658 3659 void MacroAssembler::far_call(const Address &entry, Register tmp) { 3660 assert(CodeCache::find_blob(entry.target()) != nullptr, 3661 "destination of far call not found in code cache"); 3662 assert(entry.rspec().type() == relocInfo::external_word_type 3663 || entry.rspec().type() == relocInfo::runtime_call_type 3664 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3665 // Fixed length: see MacroAssembler::far_branch_size() 3666 // We can use auipc + jalr here because we know that the total size of 3667 // the code cache cannot exceed 2Gb. 3668 relocate(entry.rspec(), [&] { 3669 int64_t distance = entry.target() - pc(); 3670 int32_t offset = ((int32_t)distance << 20) >> 20; 3671 assert(is_valid_32bit_offset(distance), "Far call using wrong instructions."); 3672 auipc(tmp, (int32_t)distance + 0x800); 3673 jalr(tmp, offset); 3674 }); 3675 } 3676 3677 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 3678 Register super_klass, 3679 Register tmp_reg, 3680 Label* L_success, 3681 Label* L_failure, 3682 Label* L_slow_path, 3683 Register super_check_offset) { 3684 assert_different_registers(sub_klass, super_klass, tmp_reg); 3685 bool must_load_sco = (super_check_offset == noreg); 3686 if (must_load_sco) { 3687 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 3688 } else { 3689 assert_different_registers(sub_klass, super_klass, super_check_offset); 3690 } 3691 3692 Label L_fallthrough; 3693 int label_nulls = 0; 3694 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3695 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3696 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3697 assert(label_nulls <= 1, "at most one null in batch"); 3698 3699 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3700 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3701 Address super_check_offset_addr(super_klass, sco_offset); 3702 3703 // Hacked jmp, which may only be used just before L_fallthrough. 3704 #define final_jmp(label) \ 3705 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3706 else j(label) /*omit semi*/ 3707 3708 // If the pointers are equal, we are done (e.g., String[] elements). 3709 // This self-check enables sharing of secondary supertype arrays among 3710 // non-primary types such as array-of-interface. Otherwise, each such 3711 // type would need its own customized SSA. 3712 // We move this check to the front of the fast path because many 3713 // type checks are in fact trivially successful in this manner, 3714 // so we get a nicely predicted branch right at the start of the check. 3715 beq(sub_klass, super_klass, *L_success); 3716 3717 // Check the supertype display: 3718 if (must_load_sco) { 3719 lwu(tmp_reg, super_check_offset_addr); 3720 super_check_offset = tmp_reg; 3721 } 3722 add(t0, sub_klass, super_check_offset); 3723 Address super_check_addr(t0); 3724 ld(t0, super_check_addr); // load displayed supertype 3725 3726 // This check has worked decisively for primary supers. 3727 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3728 // (Secondary supers are interfaces and very deeply nested subtypes.) 3729 // This works in the same check above because of a tricky aliasing 3730 // between the super_Cache and the primary super display elements. 3731 // (The 'super_check_addr' can address either, as the case requires.) 3732 // Note that the cache is updated below if it does not help us find 3733 // what we need immediately. 3734 // So if it was a primary super, we can just fail immediately. 3735 // Otherwise, it's the slow path for us (no success at this point). 3736 3737 beq(super_klass, t0, *L_success); 3738 mv(t1, sc_offset); 3739 if (L_failure == &L_fallthrough) { 3740 beq(super_check_offset, t1, *L_slow_path); 3741 } else { 3742 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 3743 final_jmp(*L_slow_path); 3744 } 3745 3746 bind(L_fallthrough); 3747 3748 #undef final_jmp 3749 } 3750 3751 // Scans count pointer sized words at [addr] for occurrence of value, 3752 // generic 3753 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 3754 Register tmp) { 3755 Label Lloop, Lexit; 3756 beqz(count, Lexit); 3757 bind(Lloop); 3758 ld(tmp, addr); 3759 beq(value, tmp, Lexit); 3760 add(addr, addr, wordSize); 3761 sub(count, count, 1); 3762 bnez(count, Lloop); 3763 bind(Lexit); 3764 } 3765 3766 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3767 Register super_klass, 3768 Register tmp1_reg, 3769 Register tmp2_reg, 3770 Label* L_success, 3771 Label* L_failure) { 3772 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3773 if (tmp2_reg != noreg) { 3774 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3775 } 3776 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3777 3778 Label L_fallthrough; 3779 int label_nulls = 0; 3780 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3781 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3782 3783 assert(label_nulls <= 1, "at most one null in the batch"); 3784 3785 // A couple of useful fields in sub_klass: 3786 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3787 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3788 Address secondary_supers_addr(sub_klass, ss_offset); 3789 Address super_cache_addr( sub_klass, sc_offset); 3790 3791 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3792 3793 // Do a linear scan of the secondary super-klass chain. 3794 // This code is rarely used, so simplicity is a virtue here. 3795 // The repne_scan instruction uses fixed registers, which we must spill. 3796 // Don't worry too much about pre-existing connections with the input regs. 3797 3798 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3799 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3800 3801 RegSet pushed_registers; 3802 if (!IS_A_TEMP(x12)) { 3803 pushed_registers += x12; 3804 } 3805 if (!IS_A_TEMP(x15)) { 3806 pushed_registers += x15; 3807 } 3808 3809 if (super_klass != x10) { 3810 if (!IS_A_TEMP(x10)) { 3811 pushed_registers += x10; 3812 } 3813 } 3814 3815 push_reg(pushed_registers, sp); 3816 3817 // Get super_klass value into x10 (even if it was in x15 or x12) 3818 mv(x10, super_klass); 3819 3820 #ifndef PRODUCT 3821 incrementw(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 3822 #endif // PRODUCT 3823 3824 // We will consult the secondary-super array. 3825 ld(x15, secondary_supers_addr); 3826 // Load the array length. 3827 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3828 // Skip to start of data. 3829 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3830 3831 // Set t0 to an obvious invalid value, falling through by default 3832 mv(t0, -1); 3833 // Scan X12 words at [X15] for an occurrence of X10. 3834 repne_scan(x15, x10, x12, t0); 3835 3836 // pop will restore x10, so we should use a temp register to keep its value 3837 mv(t1, x10); 3838 3839 // Unspill the temp registers: 3840 pop_reg(pushed_registers, sp); 3841 3842 bne(t1, t0, *L_failure); 3843 3844 // Success. Cache the super we found an proceed in triumph. 3845 sd(super_klass, super_cache_addr); 3846 3847 if (L_success != &L_fallthrough) { 3848 j(*L_success); 3849 } 3850 3851 #undef IS_A_TEMP 3852 3853 bind(L_fallthrough); 3854 } 3855 3856 // population_count variant for running without the CPOP 3857 // instruction, which was introduced with Zbb extension. 3858 void MacroAssembler::population_count(Register dst, Register src, 3859 Register tmp1, Register tmp2) { 3860 if (UsePopCountInstruction) { 3861 cpop(dst, src); 3862 } else { 3863 assert_different_registers(src, tmp1, tmp2); 3864 assert_different_registers(dst, tmp1, tmp2); 3865 Label loop, done; 3866 3867 mv(tmp1, src); 3868 // dst = 0; 3869 // while(tmp1 != 0) { 3870 // dst++; 3871 // tmp1 &= (tmp1 - 1); 3872 // } 3873 mv(dst, zr); 3874 beqz(tmp1, done); 3875 { 3876 bind(loop); 3877 addi(dst, dst, 1); 3878 addi(tmp2, tmp1, -1); 3879 andr(tmp1, tmp1, tmp2); 3880 bnez(tmp1, loop); 3881 } 3882 bind(done); 3883 } 3884 } 3885 3886 // Ensure that the inline code and the stub are using the same registers 3887 // as we need to call the stub from inline code when there is a collision 3888 // in the hashed lookup in the secondary supers array. 3889 #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, \ 3890 r_array_index, r_sub_klass, result, r_bitmap) \ 3891 do { \ 3892 assert(r_super_klass == x10 && \ 3893 r_array_base == x11 && \ 3894 r_array_length == x12 && \ 3895 (r_array_index == x13 || r_array_index == noreg) && \ 3896 (r_sub_klass == x14 || r_sub_klass == noreg) && \ 3897 (result == x15 || result == noreg) && \ 3898 (r_bitmap == x16 || r_bitmap == noreg), "registers must match riscv.ad"); \ 3899 } while(0) 3900 3901 // Return true: we succeeded in generating this code 3902 bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, 3903 Register r_super_klass, 3904 Register result, 3905 Register tmp1, 3906 Register tmp2, 3907 Register tmp3, 3908 Register tmp4, 3909 u1 super_klass_slot, 3910 bool stub_is_near) { 3911 assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0); 3912 3913 Label L_fallthrough; 3914 3915 BLOCK_COMMENT("lookup_secondary_supers_table {"); 3916 3917 const Register 3918 r_array_base = tmp1, // x11 3919 r_array_length = tmp2, // x12 3920 r_array_index = tmp3, // x13 3921 r_bitmap = tmp4; // x16 3922 3923 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 3924 r_array_index, r_sub_klass, result, r_bitmap); 3925 3926 u1 bit = super_klass_slot; 3927 3928 // Initialize result value to 1 which means mismatch. 3929 mv(result, 1); 3930 3931 ld(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); 3932 3933 // First check the bitmap to see if super_klass might be present. If 3934 // the bit is zero, we are certain that super_klass is not one of 3935 // the secondary supers. 3936 test_bit(t0, r_bitmap, bit); 3937 beqz(t0, L_fallthrough); 3938 3939 // Get the first array index that can contain super_klass into r_array_index. 3940 if (bit != 0) { 3941 slli(r_array_index, r_bitmap, (Klass::SECONDARY_SUPERS_TABLE_MASK - bit)); 3942 population_count(r_array_index, r_array_index, tmp1, tmp2); 3943 } else { 3944 mv(r_array_index, (u1)1); 3945 } 3946 3947 // We will consult the secondary-super array. 3948 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 3949 3950 // The value i in r_array_index is >= 1, so even though r_array_base 3951 // points to the length, we don't need to adjust it to point to the data. 3952 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code"); 3953 assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code"); 3954 3955 shadd(result, r_array_index, r_array_base, result, LogBytesPerWord); 3956 ld(result, Address(result)); 3957 xorr(result, result, r_super_klass); 3958 beqz(result, L_fallthrough); // Found a match 3959 3960 // Is there another entry to check? Consult the bitmap. 3961 test_bit(t0, r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); 3962 beqz(t0, L_fallthrough); 3963 3964 // Linear probe. 3965 if (bit != 0) { 3966 ror_imm(r_bitmap, r_bitmap, bit); 3967 } 3968 3969 // The slot we just inspected is at secondary_supers[r_array_index - 1]. 3970 // The next slot to be inspected, by the stub we're about to call, 3971 // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap 3972 // have been checked. 3973 rt_call(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); 3974 3975 BLOCK_COMMENT("} lookup_secondary_supers_table"); 3976 3977 bind(L_fallthrough); 3978 3979 if (VerifySecondarySupers) { 3980 verify_secondary_supers_table(r_sub_klass, r_super_klass, // x14, x10 3981 result, tmp1, tmp2, tmp3); // x15, x11, x12, x13 3982 } 3983 return true; 3984 } 3985 3986 // Called by code generated by check_klass_subtype_slow_path 3987 // above. This is called when there is a collision in the hashed 3988 // lookup in the secondary supers array. 3989 void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, 3990 Register r_array_base, 3991 Register r_array_index, 3992 Register r_bitmap, 3993 Register result, 3994 Register tmp1) { 3995 assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, tmp1, result, t0); 3996 3997 const Register 3998 r_array_length = tmp1, 3999 r_sub_klass = noreg; // unused 4000 4001 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 4002 r_array_index, r_sub_klass, result, r_bitmap); 4003 4004 Label L_matched, L_fallthrough, L_bitmap_full; 4005 4006 // Initialize result value to 1 which means mismatch. 4007 mv(result, 1); 4008 4009 // Load the array length. 4010 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 4011 // And adjust the array base to point to the data. 4012 // NB! Effectively increments current slot index by 1. 4013 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, ""); 4014 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 4015 4016 // Check if bitmap is SECONDARY_SUPERS_BITMAP_FULL 4017 assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "Adjust this code"); 4018 subw(t0, r_array_length, Klass::SECONDARY_SUPERS_TABLE_SIZE - 2); 4019 bgtz(t0, L_bitmap_full); 4020 4021 // NB! Our caller has checked bits 0 and 1 in the bitmap. The 4022 // current slot (at secondary_supers[r_array_index]) has not yet 4023 // been inspected, and r_array_index may be out of bounds if we 4024 // wrapped around the end of the array. 4025 4026 { // This is conventional linear probing, but instead of terminating 4027 // when a null entry is found in the table, we maintain a bitmap 4028 // in which a 0 indicates missing entries. 4029 // The check above guarantees there are 0s in the bitmap, so the loop 4030 // eventually terminates. 4031 Label L_loop; 4032 bind(L_loop); 4033 4034 // Check for wraparound. 4035 Label skip; 4036 blt(r_array_index, r_array_length, skip); 4037 mv(r_array_index, zr); 4038 bind(skip); 4039 4040 shadd(t0, r_array_index, r_array_base, t0, LogBytesPerWord); 4041 ld(t0, Address(t0)); 4042 beq(t0, r_super_klass, L_matched); 4043 4044 test_bit(t0, r_bitmap, 2); // look-ahead check (Bit 2); result is non-zero 4045 beqz(t0, L_fallthrough); 4046 4047 ror_imm(r_bitmap, r_bitmap, 1); 4048 addi(r_array_index, r_array_index, 1); 4049 j(L_loop); 4050 } 4051 4052 { // Degenerate case: more than 64 secondary supers. 4053 // FIXME: We could do something smarter here, maybe a vectorized 4054 // comparison or a binary search, but is that worth any added 4055 // complexity? 4056 bind(L_bitmap_full); 4057 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 4058 bne(r_super_klass, t0, L_fallthrough); 4059 } 4060 4061 bind(L_matched); 4062 mv(result, zr); 4063 4064 bind(L_fallthrough); 4065 } 4066 4067 // Make sure that the hashed lookup and a linear scan agree. 4068 void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, 4069 Register r_super_klass, 4070 Register result, 4071 Register tmp1, 4072 Register tmp2, 4073 Register tmp3) { 4074 assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0); 4075 4076 const Register 4077 r_array_base = tmp1, // X11 4078 r_array_length = tmp2, // X12 4079 r_array_index = noreg, // unused 4080 r_bitmap = noreg; // unused 4081 4082 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length, 4083 r_array_index, r_sub_klass, result, r_bitmap); 4084 4085 BLOCK_COMMENT("verify_secondary_supers_table {"); 4086 4087 // We will consult the secondary-super array. 4088 ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 4089 4090 // Load the array length. 4091 lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 4092 // And adjust the array base to point to the data. 4093 addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes()); 4094 4095 repne_scan(r_array_base, r_super_klass, r_array_length, t0); 4096 Label failed; 4097 mv(tmp3, 1); 4098 bne(r_super_klass, t0, failed); 4099 mv(tmp3, zr); 4100 bind(failed); 4101 4102 snez(result, result); // normalize result to 0/1 for comparison 4103 4104 Label passed; 4105 beq(tmp3, result, passed); 4106 { 4107 mv(x10, r_super_klass); 4108 mv(x11, r_sub_klass); 4109 mv(x12, tmp3); 4110 mv(x13, result); 4111 mv(x14, (address)("mismatch")); 4112 rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure)); 4113 should_not_reach_here(); 4114 } 4115 bind(passed); 4116 4117 BLOCK_COMMENT("} verify_secondary_supers_table"); 4118 } 4119 4120 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 4121 void MacroAssembler::tlab_allocate(Register obj, 4122 Register var_size_in_bytes, 4123 int con_size_in_bytes, 4124 Register tmp1, 4125 Register tmp2, 4126 Label& slow_case, 4127 bool is_far) { 4128 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4129 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 4130 } 4131 4132 // get_thread() can be called anywhere inside generated code so we 4133 // need to save whatever non-callee save context might get clobbered 4134 // by the call to Thread::current() or, indeed, the call setup code. 4135 void MacroAssembler::get_thread(Register thread) { 4136 // save all call-clobbered regs except thread 4137 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 4138 RegSet::range(x28, x31) + ra - thread; 4139 push_reg(saved_regs, sp); 4140 4141 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 4142 jalr(ra); 4143 if (thread != c_rarg0) { 4144 mv(thread, c_rarg0); 4145 } 4146 4147 // restore pushed registers 4148 pop_reg(saved_regs, sp); 4149 } 4150 4151 void MacroAssembler::load_byte_map_base(Register reg) { 4152 CardTable::CardValue* byte_map_base = 4153 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 4154 mv(reg, (uint64_t)byte_map_base); 4155 } 4156 4157 void MacroAssembler::build_frame(int framesize) { 4158 assert(framesize >= 2, "framesize must include space for FP/RA"); 4159 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4160 sub(sp, sp, framesize); 4161 sd(fp, Address(sp, framesize - 2 * wordSize)); 4162 sd(ra, Address(sp, framesize - wordSize)); 4163 if (PreserveFramePointer) { add(fp, sp, framesize); } 4164 } 4165 4166 void MacroAssembler::remove_frame(int framesize) { 4167 assert(framesize >= 2, "framesize must include space for FP/RA"); 4168 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 4169 ld(fp, Address(sp, framesize - 2 * wordSize)); 4170 ld(ra, Address(sp, framesize - wordSize)); 4171 add(sp, sp, framesize); 4172 } 4173 4174 void MacroAssembler::reserved_stack_check() { 4175 // testing if reserved zone needs to be enabled 4176 Label no_reserved_zone_enabling; 4177 4178 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 4179 bltu(sp, t0, no_reserved_zone_enabling); 4180 4181 enter(); // RA and FP are live. 4182 mv(c_rarg0, xthread); 4183 rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 4184 leave(); 4185 4186 // We have already removed our own frame. 4187 // throw_delayed_StackOverflowError will think that it's been 4188 // called by our caller. 4189 la(t0, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry())); 4190 jr(t0); 4191 should_not_reach_here(); 4192 4193 bind(no_reserved_zone_enabling); 4194 } 4195 4196 // Move the address of the polling page into dest. 4197 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 4198 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 4199 } 4200 4201 // Read the polling page. The address of the polling page must 4202 // already be in r. 4203 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 4204 relocate(rtype, [&] { 4205 lwu(zr, Address(r, offset)); 4206 }); 4207 } 4208 4209 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 4210 #ifdef ASSERT 4211 { 4212 ThreadInVMfromUnknown tiv; 4213 assert (UseCompressedOops, "should only be used for compressed oops"); 4214 assert (Universe::heap() != nullptr, "java heap should be initialized"); 4215 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4216 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 4217 } 4218 #endif 4219 int oop_index = oop_recorder()->find_index(obj); 4220 relocate(oop_Relocation::spec(oop_index), [&] { 4221 li32(dst, 0xDEADBEEF); 4222 }); 4223 zero_extend(dst, dst, 32); 4224 } 4225 4226 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 4227 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 4228 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 4229 int index = oop_recorder()->find_index(k); 4230 assert(!Universe::heap()->is_in(k), "should not be an oop"); 4231 4232 narrowKlass nk = CompressedKlassPointers::encode(k); 4233 relocate(metadata_Relocation::spec(index), [&] { 4234 li32(dst, nk); 4235 }); 4236 zero_extend(dst, dst, 32); 4237 } 4238 4239 // Maybe emit a call via a trampoline. If the code cache is small 4240 // trampolines won't be emitted. 4241 address MacroAssembler::trampoline_call(Address entry) { 4242 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4243 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4244 entry.rspec().type() == relocInfo::static_call_type || 4245 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4246 4247 address target = entry.target(); 4248 4249 // We need a trampoline if branches are far. 4250 if (!in_scratch_emit_size()) { 4251 if (entry.rspec().type() == relocInfo::runtime_call_type) { 4252 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 4253 code()->share_trampoline_for(entry.target(), offset()); 4254 } else { 4255 address stub = emit_trampoline_stub(offset(), target); 4256 if (stub == nullptr) { 4257 postcond(pc() == badAddress); 4258 return nullptr; // CodeCache is full 4259 } 4260 } 4261 } 4262 target = pc(); 4263 4264 address call_pc = pc(); 4265 #ifdef ASSERT 4266 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4267 assert_alignment(call_pc); 4268 } 4269 #endif 4270 relocate(entry.rspec(), [&] { 4271 jump_link(target, t0); 4272 }); 4273 4274 postcond(pc() != badAddress); 4275 return call_pc; 4276 } 4277 4278 address MacroAssembler::load_and_call(Address entry) { 4279 assert(entry.rspec().type() == relocInfo::runtime_call_type || 4280 entry.rspec().type() == relocInfo::opt_virtual_call_type || 4281 entry.rspec().type() == relocInfo::static_call_type || 4282 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 4283 4284 address target = entry.target(); 4285 4286 if (!in_scratch_emit_size()) { 4287 address stub = emit_address_stub(offset(), target); 4288 if (stub == nullptr) { 4289 postcond(pc() == badAddress); 4290 return nullptr; // CodeCache is full 4291 } 4292 } 4293 4294 address call_pc = pc(); 4295 #ifdef ASSERT 4296 if (entry.rspec().type() != relocInfo::runtime_call_type) { 4297 assert_alignment(call_pc); 4298 } 4299 #endif 4300 relocate(entry.rspec(), [&] { 4301 load_link_jump(target); 4302 }); 4303 4304 postcond(pc() != badAddress); 4305 return call_pc; 4306 } 4307 4308 address MacroAssembler::ic_call(address entry, jint method_index) { 4309 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 4310 IncompressibleRegion ir(this); // relocations 4311 movptr(t1, (address)Universe::non_oop_word(), t0); 4312 assert_cond(entry != nullptr); 4313 return reloc_call(Address(entry, rh)); 4314 } 4315 4316 int MacroAssembler::ic_check_size() { 4317 // No compressed 4318 return (MacroAssembler::instruction_size * (2 /* 2 loads */ + 1 /* branch */)) + 4319 far_branch_size(); 4320 } 4321 4322 int MacroAssembler::ic_check(int end_alignment) { 4323 IncompressibleRegion ir(this); 4324 Register receiver = j_rarg0; 4325 Register data = t1; 4326 4327 Register tmp1 = t0; // t0 always scratch 4328 // t2 is saved on call, thus should have been saved before this check. 4329 // Hence we can clobber it. 4330 Register tmp2 = t2; 4331 4332 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 4333 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 4334 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 4335 // before the inline cache check here, and not after 4336 align(end_alignment, ic_check_size()); 4337 int uep_offset = offset(); 4338 4339 if (UseCompressedClassPointers) { 4340 lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4341 lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4342 } else { 4343 ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 4344 ld(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 4345 } 4346 4347 Label ic_hit; 4348 beq(tmp1, tmp2, ic_hit); 4349 // Note, far_jump is not fixed size. 4350 // Is this ever generates a movptr alignment/size will be off. 4351 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 4352 bind(ic_hit); 4353 4354 assert((offset() % end_alignment) == 0, "Misaligned verified entry point."); 4355 return uep_offset; 4356 } 4357 4358 address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) { 4359 address stub = start_a_stub(max_reloc_call_stub_size()); 4360 if (stub == nullptr) { 4361 return nullptr; // CodeBuffer::expand failed 4362 } 4363 4364 // We are always 4-byte aligned here. 4365 assert_alignment(pc()); 4366 4367 // Make sure the address of destination 8-byte aligned. 4368 align(wordSize, 0); 4369 4370 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4371 insts_call_instruction_offset); 4372 const int stub_start_offset = offset(); 4373 relocate(rh, [&] { 4374 assert(offset() - stub_start_offset == 0, 4375 "%ld - %ld == %ld : should be", (long)offset(), (long)stub_start_offset, (long)0); 4376 assert(offset() % wordSize == 0, "bad alignment"); 4377 emit_int64((int64_t)dest); 4378 }); 4379 4380 const address stub_start_addr = addr_at(stub_start_offset); 4381 end_a_stub(); 4382 4383 return stub_start_addr; 4384 } 4385 4386 // Emit a trampoline stub for a call to a target which is too far away. 4387 // 4388 // code sequences: 4389 // 4390 // call-site: 4391 // branch-and-link to <destination> or <trampoline stub> 4392 // 4393 // Related trampoline stub for this call site in the stub section: 4394 // load the call target from the constant pool 4395 // branch (RA still points to the call site above) 4396 4397 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 4398 address dest) { 4399 // Max stub size: alignment nop, TrampolineStub. 4400 address stub = start_a_stub(max_reloc_call_stub_size()); 4401 if (stub == nullptr) { 4402 return nullptr; // CodeBuffer::expand failed 4403 } 4404 4405 assert(UseTrampolines, "Must be using trampos."); 4406 4407 // We are always 4-byte aligned here. 4408 assert_alignment(pc()); 4409 4410 // Create a trampoline stub relocation which relates this trampoline stub 4411 // with the call instruction at insts_call_instruction_offset in the 4412 // instructions code-section. 4413 4414 // Make sure the address of destination 8-byte aligned after 3 instructions. 4415 align(wordSize, MacroAssembler::NativeShortCall::trampoline_data_offset); 4416 4417 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 4418 insts_call_instruction_offset); 4419 const int stub_start_offset = offset(); 4420 relocate(rh, [&] { 4421 // Now, create the trampoline stub's code: 4422 // - load the call 4423 // - call 4424 Label target; 4425 ld(t0, target); // auipc + ld 4426 jr(t0); // jalr 4427 bind(target); 4428 assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset, 4429 "should be"); 4430 assert(offset() % wordSize == 0, "bad alignment"); 4431 emit_int64((int64_t)dest); 4432 }); 4433 4434 const address stub_start_addr = addr_at(stub_start_offset); 4435 4436 end_a_stub(); 4437 4438 return stub_start_addr; 4439 } 4440 4441 int MacroAssembler::max_reloc_call_stub_size() { 4442 // Max stub size: alignment nop, TrampolineStub. 4443 if (UseTrampolines) { 4444 return instruction_size + MacroAssembler::NativeShortCall::trampoline_size; 4445 } 4446 return instruction_size + wordSize; 4447 } 4448 4449 int MacroAssembler::static_call_stub_size() { 4450 // (lui, addi, slli, addi, slli, addi) + (lui + lui + slli + add) + jalr 4451 return 11 * MacroAssembler::instruction_size; 4452 } 4453 4454 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 4455 switch (dst.getMode()) { 4456 case Address::base_plus_offset: 4457 // This is the expected mode, although we allow all the other 4458 // forms below. 4459 return form_address(tmp, dst.base(), dst.offset()); 4460 default: 4461 la(tmp, dst); 4462 return Address(tmp); 4463 } 4464 } 4465 4466 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4467 assert(((dst.getMode() == Address::base_plus_offset && 4468 is_simm12(dst.offset())) || is_simm12(value)), 4469 "invalid value and address mode combination"); 4470 Address adr = add_memory_helper(dst, tmp2); 4471 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4472 ld(tmp1, adr); 4473 add(tmp1, tmp1, value, tmp2); 4474 sd(tmp1, adr); 4475 } 4476 4477 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4478 assert(((dst.getMode() == Address::base_plus_offset && 4479 is_simm12(dst.offset())) || is_simm12(value)), 4480 "invalid value and address mode combination"); 4481 Address adr = add_memory_helper(dst, tmp2); 4482 assert(!adr.uses(tmp1), "invalid dst for address increment"); 4483 lwu(tmp1, adr); 4484 addw(tmp1, tmp1, value, tmp2); 4485 sw(tmp1, adr); 4486 } 4487 4488 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 4489 assert(((dst.getMode() == Address::base_plus_offset && 4490 is_simm12(dst.offset())) || is_simm12(value)), 4491 "invalid value and address mode combination"); 4492 Address adr = add_memory_helper(dst, tmp2); 4493 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4494 ld(tmp1, adr); 4495 sub(tmp1, tmp1, value, tmp2); 4496 sd(tmp1, adr); 4497 } 4498 4499 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 4500 assert(((dst.getMode() == Address::base_plus_offset && 4501 is_simm12(dst.offset())) || is_simm12(value)), 4502 "invalid value and address mode combination"); 4503 Address adr = add_memory_helper(dst, tmp2); 4504 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 4505 lwu(tmp1, adr); 4506 subw(tmp1, tmp1, value, tmp2); 4507 sw(tmp1, adr); 4508 } 4509 4510 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 4511 assert_different_registers(src1, t0); 4512 relocate(src2.rspec(), [&] { 4513 int32_t offset; 4514 la(t0, src2.target(), offset); 4515 ld(t0, Address(t0, offset)); 4516 }); 4517 beq(src1, t0, equal); 4518 } 4519 4520 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 4521 load_method_holder(result, method); 4522 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 4523 } 4524 4525 void MacroAssembler::load_method_holder(Register holder, Register method) { 4526 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 4527 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 4528 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 4529 } 4530 4531 // string indexof 4532 // compute index by trailing zeros 4533 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 4534 Register match_mask, Register result, 4535 Register ch2, Register tmp, 4536 bool haystack_isL) { 4537 int haystack_chr_shift = haystack_isL ? 0 : 1; 4538 srl(match_mask, match_mask, trailing_zeros); 4539 srli(match_mask, match_mask, 1); 4540 srli(tmp, trailing_zeros, LogBitsPerByte); 4541 if (!haystack_isL) andi(tmp, tmp, 0xE); 4542 add(haystack, haystack, tmp); 4543 ld(ch2, Address(haystack)); 4544 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 4545 add(result, result, tmp); 4546 } 4547 4548 // string indexof 4549 // Find pattern element in src, compute match mask, 4550 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 4551 // match mask patterns and corresponding indices would be like: 4552 // - 0x8080808080808080 (Latin1) 4553 // - 7 6 5 4 3 2 1 0 (match index) 4554 // - 0x8000800080008000 (UTF16) 4555 // - 3 2 1 0 (match index) 4556 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 4557 Register mask1, Register mask2) { 4558 xorr(src, pattern, src); 4559 sub(match_mask, src, mask1); 4560 orr(src, src, mask2); 4561 notr(src, src); 4562 andr(match_mask, match_mask, src); 4563 } 4564 4565 #ifdef COMPILER2 4566 // Code for BigInteger::mulAdd intrinsic 4567 // out = x10 4568 // in = x11 4569 // offset = x12 (already out.length-offset) 4570 // len = x13 4571 // k = x14 4572 // tmp = x28 4573 // 4574 // pseudo code from java implementation: 4575 // long kLong = k & LONG_MASK; 4576 // carry = 0; 4577 // offset = out.length-offset - 1; 4578 // for (int j = len - 1; j >= 0; j--) { 4579 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 4580 // out[offset--] = (int)product; 4581 // carry = product >>> 32; 4582 // } 4583 // return (int)carry; 4584 void MacroAssembler::mul_add(Register out, Register in, Register offset, 4585 Register len, Register k, Register tmp) { 4586 Label L_tail_loop, L_unroll, L_end; 4587 mv(tmp, out); 4588 mv(out, zr); 4589 blez(len, L_end); 4590 zero_extend(k, k, 32); 4591 slliw(t0, offset, LogBytesPerInt); 4592 add(offset, tmp, t0); 4593 slliw(t0, len, LogBytesPerInt); 4594 add(in, in, t0); 4595 4596 const int unroll = 8; 4597 mv(tmp, unroll); 4598 blt(len, tmp, L_tail_loop); 4599 bind(L_unroll); 4600 for (int i = 0; i < unroll; i++) { 4601 sub(in, in, BytesPerInt); 4602 lwu(t0, Address(in, 0)); 4603 mul(t1, t0, k); 4604 add(t0, t1, out); 4605 sub(offset, offset, BytesPerInt); 4606 lwu(t1, Address(offset, 0)); 4607 add(t0, t0, t1); 4608 sw(t0, Address(offset, 0)); 4609 srli(out, t0, 32); 4610 } 4611 subw(len, len, tmp); 4612 bge(len, tmp, L_unroll); 4613 4614 bind(L_tail_loop); 4615 blez(len, L_end); 4616 sub(in, in, BytesPerInt); 4617 lwu(t0, Address(in, 0)); 4618 mul(t1, t0, k); 4619 add(t0, t1, out); 4620 sub(offset, offset, BytesPerInt); 4621 lwu(t1, Address(offset, 0)); 4622 add(t0, t0, t1); 4623 sw(t0, Address(offset, 0)); 4624 srli(out, t0, 32); 4625 subw(len, len, 1); 4626 j(L_tail_loop); 4627 4628 bind(L_end); 4629 } 4630 4631 // Multiply and multiply-accumulate unsigned 64-bit registers. 4632 void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) { 4633 assert_different_registers(prod_lo, prod_hi); 4634 4635 mul(prod_lo, n, m); 4636 mulhu(prod_hi, n, m); 4637 } 4638 4639 void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n, 4640 Register m, Register tmp1, Register tmp2) { 4641 assert_different_registers(sum_lo, sum_hi); 4642 assert_different_registers(sum_hi, tmp2); 4643 4644 wide_mul(tmp1, tmp2, n, m); 4645 cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1 4646 adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi 4647 } 4648 4649 // add two unsigned input and output carry 4650 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 4651 { 4652 assert_different_registers(dst, carry); 4653 assert_different_registers(dst, src2); 4654 add(dst, src1, src2); 4655 sltu(carry, dst, src2); 4656 } 4657 4658 // add two input with carry 4659 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 4660 assert_different_registers(dst, carry); 4661 add(dst, src1, src2); 4662 add(dst, dst, carry); 4663 } 4664 4665 // add two unsigned input with carry and output carry 4666 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 4667 assert_different_registers(dst, src2); 4668 adc(dst, src1, src2, carry); 4669 sltu(carry, dst, src2); 4670 } 4671 4672 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 4673 Register src1, Register src2, Register carry) { 4674 cad(dest_lo, dest_lo, src1, carry); 4675 add(dest_hi, dest_hi, carry); 4676 cad(dest_lo, dest_lo, src2, carry); 4677 add(final_dest_hi, dest_hi, carry); 4678 } 4679 4680 /** 4681 * Multiply 32 bit by 32 bit first loop. 4682 */ 4683 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 4684 Register y, Register y_idx, Register z, 4685 Register carry, Register product, 4686 Register idx, Register kdx) { 4687 // jlong carry, x[], y[], z[]; 4688 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4689 // long product = y[idx] * x[xstart] + carry; 4690 // z[kdx] = (int)product; 4691 // carry = product >>> 32; 4692 // } 4693 // z[xstart] = (int)carry; 4694 4695 Label L_first_loop, L_first_loop_exit; 4696 blez(idx, L_first_loop_exit); 4697 4698 shadd(t0, xstart, x, t0, LogBytesPerInt); 4699 lwu(x_xstart, Address(t0, 0)); 4700 4701 bind(L_first_loop); 4702 subw(idx, idx, 1); 4703 shadd(t0, idx, y, t0, LogBytesPerInt); 4704 lwu(y_idx, Address(t0, 0)); 4705 mul(product, x_xstart, y_idx); 4706 add(product, product, carry); 4707 srli(carry, product, 32); 4708 subw(kdx, kdx, 1); 4709 shadd(t0, kdx, z, t0, LogBytesPerInt); 4710 sw(product, Address(t0, 0)); 4711 bgtz(idx, L_first_loop); 4712 4713 bind(L_first_loop_exit); 4714 } 4715 4716 /** 4717 * Multiply 64 bit by 64 bit first loop. 4718 */ 4719 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 4720 Register y, Register y_idx, Register z, 4721 Register carry, Register product, 4722 Register idx, Register kdx) { 4723 // 4724 // jlong carry, x[], y[], z[]; 4725 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4726 // huge_128 product = y[idx] * x[xstart] + carry; 4727 // z[kdx] = (jlong)product; 4728 // carry = (jlong)(product >>> 64); 4729 // } 4730 // z[xstart] = carry; 4731 // 4732 4733 Label L_first_loop, L_first_loop_exit; 4734 Label L_one_x, L_one_y, L_multiply; 4735 4736 subw(xstart, xstart, 1); 4737 bltz(xstart, L_one_x); 4738 4739 shadd(t0, xstart, x, t0, LogBytesPerInt); 4740 ld(x_xstart, Address(t0, 0)); 4741 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 4742 4743 bind(L_first_loop); 4744 subw(idx, idx, 1); 4745 bltz(idx, L_first_loop_exit); 4746 subw(idx, idx, 1); 4747 bltz(idx, L_one_y); 4748 4749 shadd(t0, idx, y, t0, LogBytesPerInt); 4750 ld(y_idx, Address(t0, 0)); 4751 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 4752 bind(L_multiply); 4753 4754 mulhu(t0, x_xstart, y_idx); 4755 mul(product, x_xstart, y_idx); 4756 cad(product, product, carry, t1); 4757 adc(carry, t0, zr, t1); 4758 4759 subw(kdx, kdx, 2); 4760 ror_imm(product, product, 32); // back to big-endian 4761 shadd(t0, kdx, z, t0, LogBytesPerInt); 4762 sd(product, Address(t0, 0)); 4763 4764 j(L_first_loop); 4765 4766 bind(L_one_y); 4767 lwu(y_idx, Address(y, 0)); 4768 j(L_multiply); 4769 4770 bind(L_one_x); 4771 lwu(x_xstart, Address(x, 0)); 4772 j(L_first_loop); 4773 4774 bind(L_first_loop_exit); 4775 } 4776 4777 /** 4778 * Multiply 128 bit by 128 bit. Unrolled inner loop. 4779 * 4780 */ 4781 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 4782 Register carry, Register carry2, 4783 Register idx, Register jdx, 4784 Register yz_idx1, Register yz_idx2, 4785 Register tmp, Register tmp3, Register tmp4, 4786 Register tmp6, Register product_hi) { 4787 // jlong carry, x[], y[], z[]; 4788 // int kdx = xstart+1; 4789 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 4790 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 4791 // jlong carry2 = (jlong)(tmp3 >>> 64); 4792 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 4793 // carry = (jlong)(tmp4 >>> 64); 4794 // z[kdx+idx+1] = (jlong)tmp3; 4795 // z[kdx+idx] = (jlong)tmp4; 4796 // } 4797 // idx += 2; 4798 // if (idx > 0) { 4799 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 4800 // z[kdx+idx] = (jlong)yz_idx1; 4801 // carry = (jlong)(yz_idx1 >>> 64); 4802 // } 4803 // 4804 4805 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 4806 4807 srliw(jdx, idx, 2); 4808 4809 bind(L_third_loop); 4810 4811 subw(jdx, jdx, 1); 4812 bltz(jdx, L_third_loop_exit); 4813 subw(idx, idx, 4); 4814 4815 shadd(t0, idx, y, t0, LogBytesPerInt); 4816 ld(yz_idx2, Address(t0, 0)); 4817 ld(yz_idx1, Address(t0, wordSize)); 4818 4819 shadd(tmp6, idx, z, t0, LogBytesPerInt); 4820 4821 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 4822 ror_imm(yz_idx2, yz_idx2, 32); 4823 4824 ld(t1, Address(tmp6, 0)); 4825 ld(t0, Address(tmp6, wordSize)); 4826 4827 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4828 mulhu(tmp4, product_hi, yz_idx1); 4829 4830 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 4831 ror_imm(t1, t1, 32, tmp); 4832 4833 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 4834 mulhu(carry2, product_hi, yz_idx2); 4835 4836 cad(tmp3, tmp3, carry, carry); 4837 adc(tmp4, tmp4, zr, carry); 4838 cad(tmp3, tmp3, t0, t0); 4839 cadc(tmp4, tmp4, tmp, t0); 4840 adc(carry, carry2, zr, t0); 4841 cad(tmp4, tmp4, t1, carry2); 4842 adc(carry, carry, zr, carry2); 4843 4844 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 4845 ror_imm(tmp4, tmp4, 32); 4846 sd(tmp4, Address(tmp6, 0)); 4847 sd(tmp3, Address(tmp6, wordSize)); 4848 4849 j(L_third_loop); 4850 4851 bind(L_third_loop_exit); 4852 4853 andi(idx, idx, 0x3); 4854 beqz(idx, L_post_third_loop_done); 4855 4856 Label L_check_1; 4857 subw(idx, idx, 2); 4858 bltz(idx, L_check_1); 4859 4860 shadd(t0, idx, y, t0, LogBytesPerInt); 4861 ld(yz_idx1, Address(t0, 0)); 4862 ror_imm(yz_idx1, yz_idx1, 32); 4863 4864 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4865 mulhu(tmp4, product_hi, yz_idx1); 4866 4867 shadd(t0, idx, z, t0, LogBytesPerInt); 4868 ld(yz_idx2, Address(t0, 0)); 4869 ror_imm(yz_idx2, yz_idx2, 32, tmp); 4870 4871 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 4872 4873 ror_imm(tmp3, tmp3, 32, tmp); 4874 sd(tmp3, Address(t0, 0)); 4875 4876 bind(L_check_1); 4877 4878 andi(idx, idx, 0x1); 4879 subw(idx, idx, 1); 4880 bltz(idx, L_post_third_loop_done); 4881 shadd(t0, idx, y, t0, LogBytesPerInt); 4882 lwu(tmp4, Address(t0, 0)); 4883 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 4884 mulhu(carry2, tmp4, product_hi); 4885 4886 shadd(t0, idx, z, t0, LogBytesPerInt); 4887 lwu(tmp4, Address(t0, 0)); 4888 4889 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 4890 4891 shadd(t0, idx, z, t0, LogBytesPerInt); 4892 sw(tmp3, Address(t0, 0)); 4893 4894 slli(t0, carry2, 32); 4895 srli(carry, tmp3, 32); 4896 orr(carry, carry, t0); 4897 4898 bind(L_post_third_loop_done); 4899 } 4900 4901 /** 4902 * Code for BigInteger::multiplyToLen() intrinsic. 4903 * 4904 * x10: x 4905 * x11: xlen 4906 * x12: y 4907 * x13: ylen 4908 * x14: z 4909 * x15: tmp0 4910 * x16: tmp1 4911 * x17: tmp2 4912 * x7: tmp3 4913 * x28: tmp4 4914 * x29: tmp5 4915 * x30: tmp6 4916 * x31: tmp7 4917 */ 4918 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 4919 Register z, Register tmp0, 4920 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 4921 Register tmp5, Register tmp6, Register product_hi) { 4922 assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 4923 4924 const Register idx = tmp1; 4925 const Register kdx = tmp2; 4926 const Register xstart = tmp3; 4927 4928 const Register y_idx = tmp4; 4929 const Register carry = tmp5; 4930 const Register product = xlen; 4931 const Register x_xstart = tmp0; 4932 4933 mv(idx, ylen); // idx = ylen; 4934 addw(kdx, xlen, ylen); // kdx = xlen+ylen; 4935 mv(carry, zr); // carry = 0; 4936 4937 Label L_multiply_64_x_64_loop, L_done; 4938 4939 subw(xstart, xlen, 1); 4940 bltz(xstart, L_done); 4941 4942 const Register jdx = tmp1; 4943 4944 if (AvoidUnalignedAccesses) { 4945 // Check if x and y are both 8-byte aligned. 4946 orr(t0, xlen, ylen); 4947 test_bit(t0, t0, 0); 4948 beqz(t0, L_multiply_64_x_64_loop); 4949 4950 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4951 shadd(t0, xstart, z, t0, LogBytesPerInt); 4952 sw(carry, Address(t0, 0)); 4953 4954 Label L_second_loop_unaligned; 4955 bind(L_second_loop_unaligned); 4956 mv(carry, zr); 4957 mv(jdx, ylen); 4958 subw(xstart, xstart, 1); 4959 bltz(xstart, L_done); 4960 sub(sp, sp, 2 * wordSize); 4961 sd(z, Address(sp, 0)); 4962 sd(zr, Address(sp, wordSize)); 4963 shadd(t0, xstart, z, t0, LogBytesPerInt); 4964 addi(z, t0, 4); 4965 shadd(t0, xstart, x, t0, LogBytesPerInt); 4966 lwu(product, Address(t0, 0)); 4967 Label L_third_loop, L_third_loop_exit; 4968 4969 blez(jdx, L_third_loop_exit); 4970 4971 bind(L_third_loop); 4972 subw(jdx, jdx, 1); 4973 shadd(t0, jdx, y, t0, LogBytesPerInt); 4974 lwu(t0, Address(t0, 0)); 4975 mul(t1, t0, product); 4976 add(t0, t1, carry); 4977 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 4978 lwu(t1, Address(tmp6, 0)); 4979 add(t0, t0, t1); 4980 sw(t0, Address(tmp6, 0)); 4981 srli(carry, t0, 32); 4982 bgtz(jdx, L_third_loop); 4983 4984 bind(L_third_loop_exit); 4985 ld(z, Address(sp, 0)); 4986 addi(sp, sp, 2 * wordSize); 4987 shadd(t0, xstart, z, t0, LogBytesPerInt); 4988 sw(carry, Address(t0, 0)); 4989 4990 j(L_second_loop_unaligned); 4991 } 4992 4993 bind(L_multiply_64_x_64_loop); 4994 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4995 4996 Label L_second_loop_aligned; 4997 beqz(kdx, L_second_loop_aligned); 4998 4999 Label L_carry; 5000 subw(kdx, kdx, 1); 5001 beqz(kdx, L_carry); 5002 5003 shadd(t0, kdx, z, t0, LogBytesPerInt); 5004 sw(carry, Address(t0, 0)); 5005 srli(carry, carry, 32); 5006 subw(kdx, kdx, 1); 5007 5008 bind(L_carry); 5009 shadd(t0, kdx, z, t0, LogBytesPerInt); 5010 sw(carry, Address(t0, 0)); 5011 5012 // Second and third (nested) loops. 5013 // 5014 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5015 // carry = 0; 5016 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5017 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5018 // (z[k] & LONG_MASK) + carry; 5019 // z[k] = (int)product; 5020 // carry = product >>> 32; 5021 // } 5022 // z[i] = (int)carry; 5023 // } 5024 // 5025 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 5026 5027 bind(L_second_loop_aligned); 5028 mv(carry, zr); // carry = 0; 5029 mv(jdx, ylen); // j = ystart+1 5030 5031 subw(xstart, xstart, 1); // i = xstart-1; 5032 bltz(xstart, L_done); 5033 5034 sub(sp, sp, 4 * wordSize); 5035 sd(z, Address(sp, 0)); 5036 5037 Label L_last_x; 5038 shadd(t0, xstart, z, t0, LogBytesPerInt); 5039 addi(z, t0, 4); 5040 subw(xstart, xstart, 1); // i = xstart-1; 5041 bltz(xstart, L_last_x); 5042 5043 shadd(t0, xstart, x, t0, LogBytesPerInt); 5044 ld(product_hi, Address(t0, 0)); 5045 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 5046 5047 Label L_third_loop_prologue; 5048 bind(L_third_loop_prologue); 5049 5050 sd(ylen, Address(sp, wordSize)); 5051 sd(x, Address(sp, 2 * wordSize)); 5052 sd(xstart, Address(sp, 3 * wordSize)); 5053 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 5054 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 5055 ld(z, Address(sp, 0)); 5056 ld(ylen, Address(sp, wordSize)); 5057 ld(x, Address(sp, 2 * wordSize)); 5058 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 5059 addi(sp, sp, 4 * wordSize); 5060 5061 addiw(tmp3, xlen, 1); 5062 shadd(t0, tmp3, z, t0, LogBytesPerInt); 5063 sw(carry, Address(t0, 0)); 5064 5065 subw(tmp3, tmp3, 1); 5066 bltz(tmp3, L_done); 5067 5068 srli(carry, carry, 32); 5069 shadd(t0, tmp3, z, t0, LogBytesPerInt); 5070 sw(carry, Address(t0, 0)); 5071 j(L_second_loop_aligned); 5072 5073 // Next infrequent code is moved outside loops. 5074 bind(L_last_x); 5075 lwu(product_hi, Address(x, 0)); 5076 j(L_third_loop_prologue); 5077 5078 bind(L_done); 5079 } 5080 #endif 5081 5082 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 5083 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 5084 // shift 16 bits once. 5085 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 5086 if (UseZbb) { 5087 assert_different_registers(Rd, Rs, tmp1); 5088 int step = isLL ? 8 : 16; 5089 ctz(Rd, Rs); 5090 andi(tmp1, Rd, step - 1); 5091 sub(Rd, Rd, tmp1); 5092 return; 5093 } 5094 5095 assert_different_registers(Rd, Rs, tmp1, tmp2); 5096 Label Loop; 5097 int step = isLL ? 8 : 16; 5098 mv(Rd, -step); 5099 mv(tmp2, Rs); 5100 5101 bind(Loop); 5102 addi(Rd, Rd, step); 5103 andi(tmp1, tmp2, ((1 << step) - 1)); 5104 srli(tmp2, tmp2, step); 5105 beqz(tmp1, Loop); 5106 } 5107 5108 // This instruction reads adjacent 4 bytes from the lower half of source register, 5109 // inflate into a register, for example: 5110 // Rs: A7A6A5A4A3A2A1A0 5111 // Rd: 00A300A200A100A0 5112 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5113 assert_different_registers(Rd, Rs, tmp1, tmp2); 5114 5115 mv(tmp1, 0xFF000000); // first byte mask at lower word 5116 andr(Rd, Rs, tmp1); 5117 for (int i = 0; i < 2; i++) { 5118 slli(Rd, Rd, wordSize); 5119 srli(tmp1, tmp1, wordSize); 5120 andr(tmp2, Rs, tmp1); 5121 orr(Rd, Rd, tmp2); 5122 } 5123 slli(Rd, Rd, wordSize); 5124 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 5125 orr(Rd, Rd, tmp2); 5126 } 5127 5128 // This instruction reads adjacent 4 bytes from the upper half of source register, 5129 // inflate into a register, for example: 5130 // Rs: A7A6A5A4A3A2A1A0 5131 // Rd: 00A700A600A500A4 5132 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 5133 assert_different_registers(Rd, Rs, tmp1, tmp2); 5134 srli(Rs, Rs, 32); // only upper 32 bits are needed 5135 inflate_lo32(Rd, Rs, tmp1, tmp2); 5136 } 5137 5138 // The size of the blocks erased by the zero_blocks stub. We must 5139 // handle anything smaller than this ourselves in zero_words(). 5140 const int MacroAssembler::zero_words_block_size = 8; 5141 5142 // zero_words() is used by C2 ClearArray patterns. It is as small as 5143 // possible, handling small word counts locally and delegating 5144 // anything larger to the zero_blocks stub. It is expanded many times 5145 // in compiled code, so it is important to keep it short. 5146 5147 // ptr: Address of a buffer to be zeroed. 5148 // cnt: Count in HeapWords. 5149 // 5150 // ptr, cnt, and t0 are clobbered. 5151 address MacroAssembler::zero_words(Register ptr, Register cnt) { 5152 assert(is_power_of_2(zero_words_block_size), "adjust this"); 5153 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 5154 assert_different_registers(cnt, t0); 5155 5156 BLOCK_COMMENT("zero_words {"); 5157 5158 mv(t0, zero_words_block_size); 5159 Label around, done, done16; 5160 bltu(cnt, t0, around); 5161 { 5162 RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks()); 5163 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 5164 if (StubRoutines::riscv::complete()) { 5165 address tpc = reloc_call(zero_blocks); 5166 if (tpc == nullptr) { 5167 DEBUG_ONLY(reset_labels(around)); 5168 postcond(pc() == badAddress); 5169 return nullptr; 5170 } 5171 } else { 5172 rt_call(zero_blocks.target()); 5173 } 5174 } 5175 bind(around); 5176 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 5177 Label l; 5178 test_bit(t0, cnt, exact_log2(i)); 5179 beqz(t0, l); 5180 for (int j = 0; j < i; j++) { 5181 sd(zr, Address(ptr, j * wordSize)); 5182 } 5183 addi(ptr, ptr, i * wordSize); 5184 bind(l); 5185 } 5186 { 5187 Label l; 5188 test_bit(t0, cnt, 0); 5189 beqz(t0, l); 5190 sd(zr, Address(ptr, 0)); 5191 bind(l); 5192 } 5193 5194 BLOCK_COMMENT("} zero_words"); 5195 postcond(pc() != badAddress); 5196 return pc(); 5197 } 5198 5199 #define SmallArraySize (18 * BytesPerLong) 5200 5201 // base: Address of a buffer to be zeroed, 8 bytes aligned. 5202 // cnt: Immediate count in HeapWords. 5203 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 5204 assert_different_registers(base, t0, t1); 5205 5206 BLOCK_COMMENT("zero_words {"); 5207 5208 if (cnt <= SmallArraySize / BytesPerLong) { 5209 for (int i = 0; i < (int)cnt; i++) { 5210 sd(zr, Address(base, i * wordSize)); 5211 } 5212 } else { 5213 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 5214 int remainder = cnt % unroll; 5215 for (int i = 0; i < remainder; i++) { 5216 sd(zr, Address(base, i * wordSize)); 5217 } 5218 5219 Label loop; 5220 Register cnt_reg = t0; 5221 Register loop_base = t1; 5222 cnt = cnt - remainder; 5223 mv(cnt_reg, cnt); 5224 add(loop_base, base, remainder * wordSize); 5225 bind(loop); 5226 sub(cnt_reg, cnt_reg, unroll); 5227 for (int i = 0; i < unroll; i++) { 5228 sd(zr, Address(loop_base, i * wordSize)); 5229 } 5230 add(loop_base, loop_base, unroll * wordSize); 5231 bnez(cnt_reg, loop); 5232 } 5233 5234 BLOCK_COMMENT("} zero_words"); 5235 } 5236 5237 // base: Address of a buffer to be filled, 8 bytes aligned. 5238 // cnt: Count in 8-byte unit. 5239 // value: Value to be filled with. 5240 // base will point to the end of the buffer after filling. 5241 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 5242 // Algorithm: 5243 // 5244 // t0 = cnt & 7 5245 // cnt -= t0 5246 // p += t0 5247 // switch (t0): 5248 // switch start: 5249 // do while cnt 5250 // cnt -= 8 5251 // p[-8] = value 5252 // case 7: 5253 // p[-7] = value 5254 // case 6: 5255 // p[-6] = value 5256 // // ... 5257 // case 1: 5258 // p[-1] = value 5259 // case 0: 5260 // p += 8 5261 // do-while end 5262 // switch end 5263 5264 assert_different_registers(base, cnt, value, t0, t1); 5265 5266 Label fini, skip, entry, loop; 5267 const int unroll = 8; // Number of sd instructions we'll unroll 5268 5269 beqz(cnt, fini); 5270 5271 andi(t0, cnt, unroll - 1); 5272 sub(cnt, cnt, t0); 5273 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 5274 shadd(base, t0, base, t1, 3); 5275 la(t1, entry); 5276 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 5277 sub(t1, t1, t0); 5278 jr(t1); 5279 5280 bind(loop); 5281 add(base, base, unroll * 8); 5282 for (int i = -unroll; i < 0; i++) { 5283 sd(value, Address(base, i * 8)); 5284 } 5285 bind(entry); 5286 sub(cnt, cnt, unroll); 5287 bgez(cnt, loop); 5288 5289 bind(fini); 5290 } 5291 5292 // Zero blocks of memory by using CBO.ZERO. 5293 // 5294 // Aligns the base address first sufficiently for CBO.ZERO, then uses 5295 // CBO.ZERO repeatedly for every full block. cnt is the size to be 5296 // zeroed in HeapWords. Returns the count of words left to be zeroed 5297 // in cnt. 5298 // 5299 // NOTE: This is intended to be used in the zero_blocks() stub. If 5300 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 5301 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 5302 Label initial_table_end, loop; 5303 5304 // Align base with cache line size. 5305 neg(tmp1, base); 5306 andi(tmp1, tmp1, CacheLineSize - 1); 5307 5308 // tmp1: the number of bytes to be filled to align the base with cache line size. 5309 add(base, base, tmp1); 5310 srai(tmp2, tmp1, 3); 5311 sub(cnt, cnt, tmp2); 5312 srli(tmp2, tmp1, 1); 5313 la(tmp1, initial_table_end); 5314 sub(tmp2, tmp1, tmp2); 5315 jr(tmp2); 5316 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 5317 sd(zr, Address(base, i)); 5318 } 5319 bind(initial_table_end); 5320 5321 mv(tmp1, CacheLineSize / wordSize); 5322 bind(loop); 5323 cbo_zero(base); 5324 sub(cnt, cnt, tmp1); 5325 add(base, base, CacheLineSize); 5326 bge(cnt, tmp1, loop); 5327 } 5328 5329 // java.lang.Math.round(float a) 5330 // Returns the closest int to the argument, with ties rounding to positive infinity. 5331 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 5332 // this instructions calling sequence provides performance improvement on all tested devices; 5333 // don't change it without re-verification 5334 Label done; 5335 mv(t0, jint_cast(0.5f)); 5336 fmv_w_x(ftmp, t0); 5337 5338 // dst = 0 if NaN 5339 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 5340 mv(dst, zr); 5341 beqz(t0, done); 5342 5343 // dst = (src + 0.5f) rounded down towards negative infinity 5344 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 5345 // RDN is required for fadd_s, RNE gives incorrect results: 5346 // -------------------------------------------------------------------- 5347 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 5348 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 5349 // -------------------------------------------------------------------- 5350 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 5351 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 5352 // -------------------------------------------------------------------- 5353 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 5354 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 5355 5356 bind(done); 5357 } 5358 5359 // java.lang.Math.round(double a) 5360 // Returns the closest long to the argument, with ties rounding to positive infinity. 5361 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 5362 // this instructions calling sequence provides performance improvement on all tested devices; 5363 // don't change it without re-verification 5364 Label done; 5365 mv(t0, julong_cast(0.5)); 5366 fmv_d_x(ftmp, t0); 5367 5368 // dst = 0 if NaN 5369 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 5370 mv(dst, zr); 5371 beqz(t0, done); 5372 5373 // dst = (src + 0.5) rounded down towards negative infinity 5374 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 5375 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 5376 5377 bind(done); 5378 } 5379 5380 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 5381 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 5382 Label done; \ 5383 assert_different_registers(dst, tmp); \ 5384 fclass_##FLOATSIG(tmp, src); \ 5385 mv(dst, zr); \ 5386 /* check if src is NaN */ \ 5387 andi(tmp, tmp, fclass_mask::nan); \ 5388 bnez(tmp, done); \ 5389 FLOATCVT(dst, src); \ 5390 bind(done); \ 5391 } 5392 5393 FCVT_SAFE(fcvt_w_s, s); 5394 FCVT_SAFE(fcvt_l_s, s); 5395 FCVT_SAFE(fcvt_w_d, d); 5396 FCVT_SAFE(fcvt_l_d, d); 5397 5398 #undef FCVT_SAFE 5399 5400 #define FCMP(FLOATTYPE, FLOATSIG) \ 5401 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 5402 FloatRegister Rs2, int unordered_result) { \ 5403 Label Ldone; \ 5404 if (unordered_result < 0) { \ 5405 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 5406 /* installs 1 if gt else 0 */ \ 5407 flt_##FLOATSIG(result, Rs2, Rs1); \ 5408 /* Rs1 > Rs2, install 1 */ \ 5409 bgtz(result, Ldone); \ 5410 feq_##FLOATSIG(result, Rs1, Rs2); \ 5411 addi(result, result, -1); \ 5412 /* Rs1 = Rs2, install 0 */ \ 5413 /* NaN or Rs1 < Rs2, install -1 */ \ 5414 bind(Ldone); \ 5415 } else { \ 5416 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 5417 /* installs 1 if gt or unordered else 0 */ \ 5418 flt_##FLOATSIG(result, Rs1, Rs2); \ 5419 /* Rs1 < Rs2, install -1 */ \ 5420 bgtz(result, Ldone); \ 5421 feq_##FLOATSIG(result, Rs1, Rs2); \ 5422 addi(result, result, -1); \ 5423 /* Rs1 = Rs2, install 0 */ \ 5424 /* NaN or Rs1 > Rs2, install 1 */ \ 5425 bind(Ldone); \ 5426 neg(result, result); \ 5427 } \ 5428 } 5429 5430 FCMP(float, s); 5431 FCMP(double, d); 5432 5433 #undef FCMP 5434 5435 // Zero words; len is in bytes 5436 // Destroys all registers except addr 5437 // len must be a nonzero multiple of wordSize 5438 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 5439 assert_different_registers(addr, len, tmp, t0, t1); 5440 5441 #ifdef ASSERT 5442 { 5443 Label L; 5444 andi(t0, len, BytesPerWord - 1); 5445 beqz(t0, L); 5446 stop("len is not a multiple of BytesPerWord"); 5447 bind(L); 5448 } 5449 #endif // ASSERT 5450 5451 #ifndef PRODUCT 5452 block_comment("zero memory"); 5453 #endif // PRODUCT 5454 5455 Label loop; 5456 Label entry; 5457 5458 // Algorithm: 5459 // 5460 // t0 = cnt & 7 5461 // cnt -= t0 5462 // p += t0 5463 // switch (t0) { 5464 // do { 5465 // cnt -= 8 5466 // p[-8] = 0 5467 // case 7: 5468 // p[-7] = 0 5469 // case 6: 5470 // p[-6] = 0 5471 // ... 5472 // case 1: 5473 // p[-1] = 0 5474 // case 0: 5475 // p += 8 5476 // } while (cnt) 5477 // } 5478 5479 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 5480 5481 srli(len, len, LogBytesPerWord); 5482 andi(t0, len, unroll - 1); // t0 = cnt % unroll 5483 sub(len, len, t0); // cnt -= unroll 5484 // tmp always points to the end of the region we're about to zero 5485 shadd(tmp, t0, addr, t1, LogBytesPerWord); 5486 la(t1, entry); 5487 slli(t0, t0, 2); 5488 sub(t1, t1, t0); 5489 jr(t1); 5490 bind(loop); 5491 sub(len, len, unroll); 5492 for (int i = -unroll; i < 0; i++) { 5493 sd(zr, Address(tmp, i * wordSize)); 5494 } 5495 bind(entry); 5496 add(tmp, tmp, unroll * wordSize); 5497 bnez(len, loop); 5498 } 5499 5500 // shift left by shamt and add 5501 // Rd = (Rs1 << shamt) + Rs2 5502 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 5503 if (UseZba) { 5504 if (shamt == 1) { 5505 sh1add(Rd, Rs1, Rs2); 5506 return; 5507 } else if (shamt == 2) { 5508 sh2add(Rd, Rs1, Rs2); 5509 return; 5510 } else if (shamt == 3) { 5511 sh3add(Rd, Rs1, Rs2); 5512 return; 5513 } 5514 } 5515 5516 if (shamt != 0) { 5517 assert_different_registers(Rs2, tmp); 5518 slli(tmp, Rs1, shamt); 5519 add(Rd, Rs2, tmp); 5520 } else { 5521 add(Rd, Rs1, Rs2); 5522 } 5523 } 5524 5525 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 5526 switch (bits) { 5527 case 32: 5528 if (UseZba) { 5529 zext_w(dst, src); 5530 return; 5531 } 5532 break; 5533 case 16: 5534 if (UseZbb) { 5535 zext_h(dst, src); 5536 return; 5537 } 5538 break; 5539 case 8: 5540 if (UseZbb) { 5541 zext_b(dst, src); 5542 return; 5543 } 5544 break; 5545 default: 5546 break; 5547 } 5548 slli(dst, src, XLEN - bits); 5549 srli(dst, dst, XLEN - bits); 5550 } 5551 5552 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 5553 switch (bits) { 5554 case 32: 5555 sext_w(dst, src); 5556 return; 5557 case 16: 5558 if (UseZbb) { 5559 sext_h(dst, src); 5560 return; 5561 } 5562 break; 5563 case 8: 5564 if (UseZbb) { 5565 sext_b(dst, src); 5566 return; 5567 } 5568 break; 5569 default: 5570 break; 5571 } 5572 slli(dst, src, XLEN - bits); 5573 srai(dst, dst, XLEN - bits); 5574 } 5575 5576 void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, 5577 Register tmp, bool is_signed) { 5578 if (src1 == src2) { 5579 mv(dst, zr); 5580 return; 5581 } 5582 Label done; 5583 Register left = src1; 5584 Register right = src2; 5585 if (dst == src1) { 5586 assert_different_registers(dst, src2, tmp); 5587 mv(tmp, src1); 5588 left = tmp; 5589 } else if (dst == src2) { 5590 assert_different_registers(dst, src1, tmp); 5591 mv(tmp, src2); 5592 right = tmp; 5593 } 5594 5595 // installs 1 if gt else 0 5596 if (is_signed) { 5597 slt(dst, right, left); 5598 } else { 5599 sltu(dst, right, left); 5600 } 5601 bnez(dst, done); 5602 if (is_signed) { 5603 slt(dst, left, right); 5604 } else { 5605 sltu(dst, left, right); 5606 } 5607 // dst = -1 if lt; else if eq , dst = 0 5608 neg(dst, dst); 5609 bind(done); 5610 } 5611 5612 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 5613 { 5614 cmp_x2i(dst, src1, src2, tmp); 5615 } 5616 5617 void MacroAssembler::cmp_ul2i(Register dst, Register src1, Register src2, Register tmp) { 5618 cmp_x2i(dst, src1, src2, tmp, false); 5619 } 5620 5621 void MacroAssembler::cmp_uw2i(Register dst, Register src1, Register src2, Register tmp) { 5622 cmp_x2i(dst, src1, src2, tmp, false); 5623 } 5624 5625 // The java_calling_convention describes stack locations as ideal slots on 5626 // a frame with no abi restrictions. Since we must observe abi restrictions 5627 // (like the placement of the register window) the slots must be biased by 5628 // the following value. 5629 static int reg2offset_in(VMReg r) { 5630 // Account for saved fp and ra 5631 // This should really be in_preserve_stack_slots 5632 return r->reg2stack() * VMRegImpl::stack_slot_size; 5633 } 5634 5635 static int reg2offset_out(VMReg r) { 5636 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 5637 } 5638 5639 // The C ABI specifies: 5640 // "integer scalars narrower than XLEN bits are widened according to the sign 5641 // of their type up to 32 bits, then sign-extended to XLEN bits." 5642 // Applies for both passed in register and stack. 5643 // 5644 // Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot. 5645 // Native uses 64-bit stack slots for all integer scalar types. 5646 // 5647 // lw loads the Java stack slot, sign-extends and 5648 // sd store this widened integer into a 64 bit native stack slot. 5649 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 5650 if (src.first()->is_stack()) { 5651 if (dst.first()->is_stack()) { 5652 // stack to stack 5653 lw(tmp, Address(fp, reg2offset_in(src.first()))); 5654 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5655 } else { 5656 // stack to reg 5657 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5658 } 5659 } else if (dst.first()->is_stack()) { 5660 // reg to stack 5661 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5662 } else { 5663 if (dst.first() != src.first()) { 5664 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 5665 } 5666 } 5667 } 5668 5669 // An oop arg. Must pass a handle not the oop itself 5670 void MacroAssembler::object_move(OopMap* map, 5671 int oop_handle_offset, 5672 int framesize_in_slots, 5673 VMRegPair src, 5674 VMRegPair dst, 5675 bool is_receiver, 5676 int* receiver_offset) { 5677 assert_cond(map != nullptr && receiver_offset != nullptr); 5678 5679 // must pass a handle. First figure out the location we use as a handle 5680 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 5681 5682 // See if oop is null if it is we need no handle 5683 5684 if (src.first()->is_stack()) { 5685 // Oop is already on the stack as an argument 5686 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 5687 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 5688 if (is_receiver) { 5689 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 5690 } 5691 5692 ld(t0, Address(fp, reg2offset_in(src.first()))); 5693 la(rHandle, Address(fp, reg2offset_in(src.first()))); 5694 // conditionally move a null 5695 Label notZero1; 5696 bnez(t0, notZero1); 5697 mv(rHandle, zr); 5698 bind(notZero1); 5699 } else { 5700 5701 // Oop is in a register we must store it to the space we reserve 5702 // on the stack for oop_handles and pass a handle if oop is non-null 5703 5704 const Register rOop = src.first()->as_Register(); 5705 int oop_slot = -1; 5706 if (rOop == j_rarg0) { 5707 oop_slot = 0; 5708 } else if (rOop == j_rarg1) { 5709 oop_slot = 1; 5710 } else if (rOop == j_rarg2) { 5711 oop_slot = 2; 5712 } else if (rOop == j_rarg3) { 5713 oop_slot = 3; 5714 } else if (rOop == j_rarg4) { 5715 oop_slot = 4; 5716 } else if (rOop == j_rarg5) { 5717 oop_slot = 5; 5718 } else if (rOop == j_rarg6) { 5719 oop_slot = 6; 5720 } else { 5721 assert(rOop == j_rarg7, "wrong register"); 5722 oop_slot = 7; 5723 } 5724 5725 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 5726 int offset = oop_slot * VMRegImpl::stack_slot_size; 5727 5728 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 5729 // Store oop in handle area, may be null 5730 sd(rOop, Address(sp, offset)); 5731 if (is_receiver) { 5732 *receiver_offset = offset; 5733 } 5734 5735 //rOop maybe the same as rHandle 5736 if (rOop == rHandle) { 5737 Label isZero; 5738 beqz(rOop, isZero); 5739 la(rHandle, Address(sp, offset)); 5740 bind(isZero); 5741 } else { 5742 Label notZero2; 5743 la(rHandle, Address(sp, offset)); 5744 bnez(rOop, notZero2); 5745 mv(rHandle, zr); 5746 bind(notZero2); 5747 } 5748 } 5749 5750 // If arg is on the stack then place it otherwise it is already in correct reg. 5751 if (dst.first()->is_stack()) { 5752 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 5753 } 5754 } 5755 5756 // A float arg may have to do float reg int reg conversion 5757 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 5758 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5759 (src.first()->is_reg() && dst.first()->is_reg()) || 5760 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5761 if (src.first()->is_stack()) { 5762 if (dst.first()->is_stack()) { 5763 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 5764 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 5765 } else if (dst.first()->is_Register()) { 5766 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5767 } else { 5768 ShouldNotReachHere(); 5769 } 5770 } else if (src.first() != dst.first()) { 5771 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5772 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5773 } else { 5774 ShouldNotReachHere(); 5775 } 5776 } 5777 } 5778 5779 // A long move 5780 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 5781 if (src.first()->is_stack()) { 5782 if (dst.first()->is_stack()) { 5783 // stack to stack 5784 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5785 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5786 } else { 5787 // stack to reg 5788 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5789 } 5790 } else if (dst.first()->is_stack()) { 5791 // reg to stack 5792 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5793 } else { 5794 if (dst.first() != src.first()) { 5795 mv(dst.first()->as_Register(), src.first()->as_Register()); 5796 } 5797 } 5798 } 5799 5800 // A double move 5801 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 5802 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5803 (src.first()->is_reg() && dst.first()->is_reg()) || 5804 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5805 if (src.first()->is_stack()) { 5806 if (dst.first()->is_stack()) { 5807 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5808 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5809 } else if (dst.first()-> is_Register()) { 5810 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5811 } else { 5812 ShouldNotReachHere(); 5813 } 5814 } else if (src.first() != dst.first()) { 5815 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5816 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5817 } else { 5818 ShouldNotReachHere(); 5819 } 5820 } 5821 } 5822 5823 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 5824 assert(bit_pos < 64, "invalid bit range"); 5825 if (UseZbs) { 5826 bexti(Rd, Rs, bit_pos); 5827 return; 5828 } 5829 int64_t imm = (int64_t)(1UL << bit_pos); 5830 if (is_simm12(imm)) { 5831 and_imm12(Rd, Rs, imm); 5832 } else { 5833 srli(Rd, Rs, bit_pos); 5834 and_imm12(Rd, Rd, 1); 5835 } 5836 } 5837 5838 // Implements lightweight-locking. 5839 // 5840 // - obj: the object to be locked 5841 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 5842 // - slow: branched to if locking fails 5843 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5844 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5845 assert_different_registers(basic_lock, obj, tmp1, tmp2, tmp3, t0); 5846 5847 Label push; 5848 const Register top = tmp1; 5849 const Register mark = tmp2; 5850 const Register t = tmp3; 5851 5852 // Preload the markWord. It is important that this is the first 5853 // instruction emitted as it is part of C1's null check semantics. 5854 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5855 5856 if (UseObjectMonitorTable) { 5857 // Clear cache in case fast locking succeeds. 5858 sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); 5859 } 5860 5861 // Check if the lock-stack is full. 5862 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5863 mv(t, (unsigned)LockStack::end_offset()); 5864 bge(top, t, slow, /* is_far */ true); 5865 5866 // Check for recursion. 5867 add(t, xthread, top); 5868 ld(t, Address(t, -oopSize)); 5869 beq(obj, t, push); 5870 5871 // Check header for monitor (0b10). 5872 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5873 bnez(t, slow, /* is_far */ true); 5874 5875 // Try to lock. Transition lock-bits 0b01 => 0b00 5876 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 5877 ori(mark, mark, markWord::unlocked_value); 5878 xori(t, mark, markWord::unlocked_value); 5879 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5880 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 5881 bne(mark, t, slow, /* is_far */ true); 5882 5883 bind(push); 5884 // After successful lock, push object on lock-stack. 5885 add(t, xthread, top); 5886 sd(obj, Address(t)); 5887 addw(top, top, oopSize); 5888 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5889 } 5890 5891 // Implements ligthweight-unlocking. 5892 // 5893 // - obj: the object to be unlocked 5894 // - tmp1, tmp2, tmp3: temporary registers 5895 // - slow: branched to if unlocking fails 5896 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5897 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5898 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5899 5900 #ifdef ASSERT 5901 { 5902 // Check for lock-stack underflow. 5903 Label stack_ok; 5904 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 5905 mv(tmp2, (unsigned)LockStack::start_offset()); 5906 bge(tmp1, tmp2, stack_ok); 5907 STOP("Lock-stack underflow"); 5908 bind(stack_ok); 5909 } 5910 #endif 5911 5912 Label unlocked, push_and_slow; 5913 const Register top = tmp1; 5914 const Register mark = tmp2; 5915 const Register t = tmp3; 5916 5917 // Check if obj is top of lock-stack. 5918 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5919 subw(top, top, oopSize); 5920 add(t, xthread, top); 5921 ld(t, Address(t)); 5922 bne(obj, t, slow, /* is_far */ true); 5923 5924 // Pop lock-stack. 5925 DEBUG_ONLY(add(t, xthread, top);) 5926 DEBUG_ONLY(sd(zr, Address(t));) 5927 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5928 5929 // Check if recursive. 5930 add(t, xthread, top); 5931 ld(t, Address(t, -oopSize)); 5932 beq(obj, t, unlocked); 5933 5934 // Not recursive. Check header for monitor (0b10). 5935 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5936 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5937 bnez(t, push_and_slow); 5938 5939 #ifdef ASSERT 5940 // Check header not unlocked (0b01). 5941 Label not_unlocked; 5942 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 5943 beqz(t, not_unlocked); 5944 stop("lightweight_unlock already unlocked"); 5945 bind(not_unlocked); 5946 #endif 5947 5948 // Try to unlock. Transition lock bits 0b00 => 0b01 5949 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 5950 ori(t, mark, markWord::unlocked_value); 5951 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5952 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 5953 beq(mark, t, unlocked); 5954 5955 bind(push_and_slow); 5956 // Restore lock-stack and handle the unlock in runtime. 5957 DEBUG_ONLY(add(t, xthread, top);) 5958 DEBUG_ONLY(sd(obj, Address(t));) 5959 addw(top, top, oopSize); 5960 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5961 j(slow); 5962 5963 bind(unlocked); 5964 }