1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "gc/shared/collectedHeap.hpp" 36 #include "interpreter/bytecodeHistogram.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "memory/universe.hpp" 40 #include "nativeInst_riscv.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedOops.inline.hpp" 43 #include "oops/klass.inline.hpp" 44 #include "oops/oop.hpp" 45 #include "runtime/interfaceSupport.inline.hpp" 46 #include "runtime/javaThread.hpp" 47 #include "runtime/jniHandles.inline.hpp" 48 #include "runtime/sharedRuntime.hpp" 49 #include "runtime/stubRoutines.hpp" 50 #include "utilities/powerOfTwo.hpp" 51 #ifdef COMPILER2 52 #include "opto/compile.hpp" 53 #include "opto/node.hpp" 54 #include "opto/output.hpp" 55 #endif 56 57 #ifdef PRODUCT 58 #define BLOCK_COMMENT(str) /* nothing */ 59 #else 60 #define BLOCK_COMMENT(str) block_comment(str) 61 #endif 62 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 63 64 static void pass_arg0(MacroAssembler* masm, Register arg) { 65 if (c_rarg0 != arg) { 66 masm->mv(c_rarg0, arg); 67 } 68 } 69 70 static void pass_arg1(MacroAssembler* masm, Register arg) { 71 if (c_rarg1 != arg) { 72 masm->mv(c_rarg1, arg); 73 } 74 } 75 76 static void pass_arg2(MacroAssembler* masm, Register arg) { 77 if (c_rarg2 != arg) { 78 masm->mv(c_rarg2, arg); 79 } 80 } 81 82 static void pass_arg3(MacroAssembler* masm, Register arg) { 83 if (c_rarg3 != arg) { 84 masm->mv(c_rarg3, arg); 85 } 86 } 87 88 void MacroAssembler::push_cont_fastpath(Register java_thread) { 89 if (!Continuations::enabled()) return; 90 Label done; 91 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 92 bleu(sp, t0, done); 93 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 94 bind(done); 95 } 96 97 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 98 if (!Continuations::enabled()) return; 99 Label done; 100 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 101 bltu(sp, t0, done); 102 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 103 bind(done); 104 } 105 106 int MacroAssembler::align(int modulus, int extra_offset) { 107 CompressibleRegion cr(this); 108 intptr_t before = offset(); 109 while ((offset() + extra_offset) % modulus != 0) { nop(); } 110 return (int)(offset() - before); 111 } 112 113 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 114 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 115 } 116 117 // Implementation of call_VM versions 118 119 void MacroAssembler::call_VM(Register oop_result, 120 address entry_point, 121 bool check_exceptions) { 122 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 123 } 124 125 void MacroAssembler::call_VM(Register oop_result, 126 address entry_point, 127 Register arg_1, 128 bool check_exceptions) { 129 pass_arg1(this, arg_1); 130 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 131 } 132 133 void MacroAssembler::call_VM(Register oop_result, 134 address entry_point, 135 Register arg_1, 136 Register arg_2, 137 bool check_exceptions) { 138 assert(arg_1 != c_rarg2, "smashed arg"); 139 pass_arg2(this, arg_2); 140 pass_arg1(this, arg_1); 141 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 142 } 143 144 void MacroAssembler::call_VM(Register oop_result, 145 address entry_point, 146 Register arg_1, 147 Register arg_2, 148 Register arg_3, 149 bool check_exceptions) { 150 assert(arg_1 != c_rarg3, "smashed arg"); 151 assert(arg_2 != c_rarg3, "smashed arg"); 152 pass_arg3(this, arg_3); 153 154 assert(arg_1 != c_rarg2, "smashed arg"); 155 pass_arg2(this, arg_2); 156 157 pass_arg1(this, arg_1); 158 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 159 } 160 161 void MacroAssembler::call_VM(Register oop_result, 162 Register last_java_sp, 163 address entry_point, 164 int number_of_arguments, 165 bool check_exceptions) { 166 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 167 } 168 169 void MacroAssembler::call_VM(Register oop_result, 170 Register last_java_sp, 171 address entry_point, 172 Register arg_1, 173 bool check_exceptions) { 174 pass_arg1(this, arg_1); 175 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 176 } 177 178 void MacroAssembler::call_VM(Register oop_result, 179 Register last_java_sp, 180 address entry_point, 181 Register arg_1, 182 Register arg_2, 183 bool check_exceptions) { 184 185 assert(arg_1 != c_rarg2, "smashed arg"); 186 pass_arg2(this, arg_2); 187 pass_arg1(this, arg_1); 188 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 189 } 190 191 void MacroAssembler::call_VM(Register oop_result, 192 Register last_java_sp, 193 address entry_point, 194 Register arg_1, 195 Register arg_2, 196 Register arg_3, 197 bool check_exceptions) { 198 assert(arg_1 != c_rarg3, "smashed arg"); 199 assert(arg_2 != c_rarg3, "smashed arg"); 200 pass_arg3(this, arg_3); 201 assert(arg_1 != c_rarg2, "smashed arg"); 202 pass_arg2(this, arg_2); 203 pass_arg1(this, arg_1); 204 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 205 } 206 207 void MacroAssembler::post_call_nop() { 208 if (!Continuations::enabled()) { 209 return; 210 } 211 relocate(post_call_nop_Relocation::spec(), [&] { 212 InlineSkippedInstructionsCounter skipCounter(this); 213 nop(); 214 li32(zr, 0); 215 }); 216 } 217 218 // these are no-ops overridden by InterpreterMacroAssembler 219 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 220 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 221 222 // Calls to C land 223 // 224 // When entering C land, the fp, & esp of the last Java frame have to be recorded 225 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 226 // has to be reset to 0. This is required to allow proper stack traversal. 227 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 228 Register last_java_fp, 229 Register last_java_pc, 230 Register tmp) { 231 232 if (last_java_pc->is_valid()) { 233 sd(last_java_pc, Address(xthread, 234 JavaThread::frame_anchor_offset() + 235 JavaFrameAnchor::last_Java_pc_offset())); 236 } 237 238 // determine last_java_sp register 239 if (last_java_sp == sp) { 240 mv(tmp, sp); 241 last_java_sp = tmp; 242 } else if (!last_java_sp->is_valid()) { 243 last_java_sp = esp; 244 } 245 246 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 247 248 // last_java_fp is optional 249 if (last_java_fp->is_valid()) { 250 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 251 } 252 } 253 254 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 255 Register last_java_fp, 256 address last_java_pc, 257 Register tmp) { 258 assert(last_java_pc != NULL, "must provide a valid PC"); 259 260 la(tmp, last_java_pc); 261 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 262 263 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); 264 } 265 266 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 267 Register last_java_fp, 268 Label &L, 269 Register tmp) { 270 if (L.is_bound()) { 271 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 272 } else { 273 L.add_patch_at(code(), locator()); 274 IncompressibleRegion ir(this); // the label address will be patched back. 275 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 276 } 277 } 278 279 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 280 // we must set sp to zero to clear frame 281 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 282 283 // must clear fp, so that compiled frames are not confused; it is 284 // possible that we need it only for debugging 285 if (clear_fp) { 286 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 287 } 288 289 // Always clear the pc because it could have been set by make_walkable() 290 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 291 } 292 293 void MacroAssembler::call_VM_base(Register oop_result, 294 Register java_thread, 295 Register last_java_sp, 296 address entry_point, 297 int number_of_arguments, 298 bool check_exceptions) { 299 // determine java_thread register 300 if (!java_thread->is_valid()) { 301 java_thread = xthread; 302 } 303 // determine last_java_sp register 304 if (!last_java_sp->is_valid()) { 305 last_java_sp = esp; 306 } 307 308 // debugging support 309 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 310 assert(java_thread == xthread, "unexpected register"); 311 312 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 313 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 314 315 // push java thread (becomes first argument of C function) 316 mv(c_rarg0, java_thread); 317 318 // set last Java frame before call 319 assert(last_java_sp != fp, "can't use fp"); 320 321 Label l; 322 set_last_Java_frame(last_java_sp, fp, l, t0); 323 324 // do the call, remove parameters 325 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 326 327 // reset last Java frame 328 // Only interpreter should have to clear fp 329 reset_last_Java_frame(true); 330 331 // C++ interp handles this in the interpreter 332 check_and_handle_popframe(java_thread); 333 check_and_handle_earlyret(java_thread); 334 335 if (check_exceptions) { 336 // check for pending exceptions (java_thread is set upon return) 337 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 338 Label ok; 339 beqz(t0, ok); 340 RuntimeAddress target(StubRoutines::forward_exception_entry()); 341 relocate(target.rspec(), [&] { 342 int32_t offset; 343 la_patchable(t0, target, offset); 344 jalr(x0, t0, offset); 345 }); 346 bind(ok); 347 } 348 349 // get oop result if there is one and reset the value in the thread 350 if (oop_result->is_valid()) { 351 get_vm_result(oop_result, java_thread); 352 } 353 } 354 355 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 356 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 357 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 358 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 359 } 360 361 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 362 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 363 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 364 } 365 366 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 367 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); 368 assert_different_registers(klass, xthread, tmp); 369 370 Label L_fallthrough, L_tmp; 371 if (L_fast_path == NULL) { 372 L_fast_path = &L_fallthrough; 373 } else if (L_slow_path == NULL) { 374 L_slow_path = &L_fallthrough; 375 } 376 377 // Fast path check: class is fully initialized 378 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 379 sub(tmp, tmp, InstanceKlass::fully_initialized); 380 beqz(tmp, *L_fast_path); 381 382 // Fast path check: current thread is initializer thread 383 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 384 385 if (L_slow_path == &L_fallthrough) { 386 beq(xthread, tmp, *L_fast_path); 387 bind(*L_slow_path); 388 } else if (L_fast_path == &L_fallthrough) { 389 bne(xthread, tmp, *L_slow_path); 390 bind(*L_fast_path); 391 } else { 392 Unimplemented(); 393 } 394 } 395 396 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 397 if (!VerifyOops) { return; } 398 399 // Pass register number to verify_oop_subroutine 400 const char* b = NULL; 401 { 402 ResourceMark rm; 403 stringStream ss; 404 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 405 b = code_string(ss.as_string()); 406 } 407 BLOCK_COMMENT("verify_oop {"); 408 409 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 410 411 mv(c_rarg0, reg); // c_rarg0 : x10 412 { 413 // The length of the instruction sequence emitted should not depend 414 // on the address of the char buffer so that the size of mach nodes for 415 // scratch emit and normal emit matches. 416 IncompressibleRegion ir(this); // Fixed length 417 movptr(t0, (address) b); 418 } 419 420 // call indirectly to solve generation ordering problem 421 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 422 relocate(target.rspec(), [&] { 423 int32_t offset; 424 la_patchable(t1, target, offset); 425 ld(t1, Address(t1, offset)); 426 }); 427 jalr(t1); 428 429 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 430 431 BLOCK_COMMENT("} verify_oop"); 432 } 433 434 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 435 if (!VerifyOops) { 436 return; 437 } 438 439 const char* b = NULL; 440 { 441 ResourceMark rm; 442 stringStream ss; 443 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 444 b = code_string(ss.as_string()); 445 } 446 BLOCK_COMMENT("verify_oop_addr {"); 447 448 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 449 450 if (addr.uses(sp)) { 451 la(x10, addr); 452 ld(x10, Address(x10, 4 * wordSize)); 453 } else { 454 ld(x10, addr); 455 } 456 457 { 458 // The length of the instruction sequence emitted should not depend 459 // on the address of the char buffer so that the size of mach nodes for 460 // scratch emit and normal emit matches. 461 IncompressibleRegion ir(this); // Fixed length 462 movptr(t0, (address) b); 463 } 464 465 // call indirectly to solve generation ordering problem 466 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 467 relocate(target.rspec(), [&] { 468 int32_t offset; 469 la_patchable(t1, target, offset); 470 ld(t1, Address(t1, offset)); 471 }); 472 jalr(t1); 473 474 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 475 476 BLOCK_COMMENT("} verify_oop_addr"); 477 } 478 479 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 480 int extra_slot_offset) { 481 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 482 int stackElementSize = Interpreter::stackElementSize; 483 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 484 #ifdef ASSERT 485 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 486 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 487 #endif 488 if (arg_slot.is_constant()) { 489 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 490 } else { 491 assert_different_registers(t0, arg_slot.as_register()); 492 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 493 return Address(t0, offset); 494 } 495 } 496 497 #ifndef PRODUCT 498 extern "C" void findpc(intptr_t x); 499 #endif 500 501 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 502 { 503 // In order to get locks to work, we need to fake a in_VM state 504 if (ShowMessageBoxOnError) { 505 JavaThread* thread = JavaThread::current(); 506 JavaThreadState saved_state = thread->thread_state(); 507 thread->set_thread_state(_thread_in_vm); 508 #ifndef PRODUCT 509 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 510 ttyLocker ttyl; 511 BytecodeCounter::print(); 512 } 513 #endif 514 if (os::message_box(msg, "Execution stopped, print registers?")) { 515 ttyLocker ttyl; 516 tty->print_cr(" pc = 0x%016lx", pc); 517 #ifndef PRODUCT 518 tty->cr(); 519 findpc(pc); 520 tty->cr(); 521 #endif 522 tty->print_cr(" x0 = 0x%016lx", regs[0]); 523 tty->print_cr(" x1 = 0x%016lx", regs[1]); 524 tty->print_cr(" x2 = 0x%016lx", regs[2]); 525 tty->print_cr(" x3 = 0x%016lx", regs[3]); 526 tty->print_cr(" x4 = 0x%016lx", regs[4]); 527 tty->print_cr(" x5 = 0x%016lx", regs[5]); 528 tty->print_cr(" x6 = 0x%016lx", regs[6]); 529 tty->print_cr(" x7 = 0x%016lx", regs[7]); 530 tty->print_cr(" x8 = 0x%016lx", regs[8]); 531 tty->print_cr(" x9 = 0x%016lx", regs[9]); 532 tty->print_cr("x10 = 0x%016lx", regs[10]); 533 tty->print_cr("x11 = 0x%016lx", regs[11]); 534 tty->print_cr("x12 = 0x%016lx", regs[12]); 535 tty->print_cr("x13 = 0x%016lx", regs[13]); 536 tty->print_cr("x14 = 0x%016lx", regs[14]); 537 tty->print_cr("x15 = 0x%016lx", regs[15]); 538 tty->print_cr("x16 = 0x%016lx", regs[16]); 539 tty->print_cr("x17 = 0x%016lx", regs[17]); 540 tty->print_cr("x18 = 0x%016lx", regs[18]); 541 tty->print_cr("x19 = 0x%016lx", regs[19]); 542 tty->print_cr("x20 = 0x%016lx", regs[20]); 543 tty->print_cr("x21 = 0x%016lx", regs[21]); 544 tty->print_cr("x22 = 0x%016lx", regs[22]); 545 tty->print_cr("x23 = 0x%016lx", regs[23]); 546 tty->print_cr("x24 = 0x%016lx", regs[24]); 547 tty->print_cr("x25 = 0x%016lx", regs[25]); 548 tty->print_cr("x26 = 0x%016lx", regs[26]); 549 tty->print_cr("x27 = 0x%016lx", regs[27]); 550 tty->print_cr("x28 = 0x%016lx", regs[28]); 551 tty->print_cr("x30 = 0x%016lx", regs[30]); 552 tty->print_cr("x31 = 0x%016lx", regs[31]); 553 BREAKPOINT; 554 } 555 } 556 fatal("DEBUG MESSAGE: %s", msg); 557 } 558 559 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 560 assert_different_registers(value, tmp1, tmp2); 561 Label done, tagged, weak_tagged; 562 563 beqz(value, done); // Use NULL as-is. 564 // Test for tag. 565 andi(t0, value, JNIHandles::tag_mask); 566 bnez(t0, tagged); 567 568 // Resolve local handle 569 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 570 verify_oop(value); 571 j(done); 572 573 bind(tagged); 574 // Test for jweak tag. 575 andi(t0, value, JNIHandles::TypeTag::weak_global); 576 bnez(t0, weak_tagged); 577 578 // Resolve global handle 579 access_load_at(T_OBJECT, IN_NATIVE, value, 580 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 581 j(done); 582 583 bind(weak_tagged); 584 // Resolve jweak. 585 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 586 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 587 verify_oop(value); 588 589 bind(done); 590 } 591 592 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 593 assert_different_registers(value, tmp1, tmp2); 594 Label done; 595 596 beqz(value, done); // Use NULL as-is. 597 598 #ifdef ASSERT 599 { 600 Label valid_global_tag; 601 andi(t0, value, JNIHandles::TypeTag::global); // Test for global tag. 602 bnez(t0, valid_global_tag); 603 stop("non global jobject using resolve_global_jobject"); 604 bind(valid_global_tag); 605 } 606 #endif 607 608 // Resolve global handle 609 access_load_at(T_OBJECT, IN_NATIVE, value, 610 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 611 verify_oop(value); 612 613 bind(done); 614 } 615 616 void MacroAssembler::stop(const char* msg) { 617 BLOCK_COMMENT(msg); 618 illegal_instruction(Assembler::csr::time); 619 emit_int64((uintptr_t)msg); 620 } 621 622 void MacroAssembler::unimplemented(const char* what) { 623 const char* buf = NULL; 624 { 625 ResourceMark rm; 626 stringStream ss; 627 ss.print("unimplemented: %s", what); 628 buf = code_string(ss.as_string()); 629 } 630 stop(buf); 631 } 632 633 void MacroAssembler::emit_static_call_stub() { 634 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size(). 635 // CompiledDirectStaticCall::set_to_interpreted knows the 636 // exact layout of this stub. 637 638 mov_metadata(xmethod, (Metadata*)NULL); 639 640 // Jump to the entry point of the c2i stub. 641 int32_t offset = 0; 642 movptr(t0, 0, offset); 643 jalr(x0, t0, offset); 644 } 645 646 void MacroAssembler::call_VM_leaf_base(address entry_point, 647 int number_of_arguments, 648 Label *retaddr) { 649 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 650 call(entry_point); 651 if (retaddr != NULL) { 652 bind(*retaddr); 653 } 654 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 655 } 656 657 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 658 call_VM_leaf_base(entry_point, number_of_arguments); 659 } 660 661 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 662 pass_arg0(this, arg_0); 663 call_VM_leaf_base(entry_point, 1); 664 } 665 666 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 667 pass_arg0(this, arg_0); 668 pass_arg1(this, arg_1); 669 call_VM_leaf_base(entry_point, 2); 670 } 671 672 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 673 Register arg_1, Register arg_2) { 674 pass_arg0(this, arg_0); 675 pass_arg1(this, arg_1); 676 pass_arg2(this, arg_2); 677 call_VM_leaf_base(entry_point, 3); 678 } 679 680 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 681 pass_arg0(this, arg_0); 682 MacroAssembler::call_VM_leaf_base(entry_point, 1); 683 } 684 685 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 686 687 assert(arg_0 != c_rarg1, "smashed arg"); 688 pass_arg1(this, arg_1); 689 pass_arg0(this, arg_0); 690 MacroAssembler::call_VM_leaf_base(entry_point, 2); 691 } 692 693 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 694 assert(arg_0 != c_rarg2, "smashed arg"); 695 assert(arg_1 != c_rarg2, "smashed arg"); 696 pass_arg2(this, arg_2); 697 assert(arg_0 != c_rarg1, "smashed arg"); 698 pass_arg1(this, arg_1); 699 pass_arg0(this, arg_0); 700 MacroAssembler::call_VM_leaf_base(entry_point, 3); 701 } 702 703 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 704 assert(arg_0 != c_rarg3, "smashed arg"); 705 assert(arg_1 != c_rarg3, "smashed arg"); 706 assert(arg_2 != c_rarg3, "smashed arg"); 707 pass_arg3(this, arg_3); 708 assert(arg_0 != c_rarg2, "smashed arg"); 709 assert(arg_1 != c_rarg2, "smashed arg"); 710 pass_arg2(this, arg_2); 711 assert(arg_0 != c_rarg1, "smashed arg"); 712 pass_arg1(this, arg_1); 713 pass_arg0(this, arg_0); 714 MacroAssembler::call_VM_leaf_base(entry_point, 4); 715 } 716 717 void MacroAssembler::la(Register Rd, const address dest) { 718 int64_t offset = dest - pc(); 719 if (is_offset_in_range(offset, 32)) { 720 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit 721 addi(Rd, Rd, ((int64_t)offset << 52) >> 52); 722 } else { 723 movptr(Rd, dest); 724 } 725 } 726 727 void MacroAssembler::la(Register Rd, const Address &adr) { 728 switch (adr.getMode()) { 729 case Address::literal: { 730 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 731 if (rtype == relocInfo::none) { 732 mv(Rd, (intptr_t)(adr.target())); 733 } else { 734 relocate(adr.rspec(), [&] { 735 movptr(Rd, adr.target()); 736 }); 737 } 738 break; 739 } 740 case Address::base_plus_offset: { 741 Address new_adr = legitimize_address(Rd, adr); 742 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 743 addi(Rd, new_adr.base(), new_adr.offset()); 744 } 745 break; 746 } 747 default: 748 ShouldNotReachHere(); 749 } 750 } 751 752 void MacroAssembler::la(Register Rd, Label &label) { 753 IncompressibleRegion ir(this); // the label address may be patched back. 754 wrap_label(Rd, label, &MacroAssembler::la); 755 } 756 757 void MacroAssembler::li32(Register Rd, int32_t imm) { 758 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 759 int64_t upper = imm, lower = imm; 760 lower = (imm << 20) >> 20; 761 upper -= lower; 762 upper = (int32_t)upper; 763 // lui Rd, imm[31:12] + imm[11] 764 lui(Rd, upper); 765 // use addiw to distinguish li32 to li64 766 addiw(Rd, Rd, lower); 767 } 768 769 void MacroAssembler::li64(Register Rd, int64_t imm) { 770 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 771 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 772 int64_t lower = imm & 0xffffffff; 773 lower -= ((lower << 44) >> 44); 774 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 775 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 776 777 // Load upper 32 bits 778 int64_t up = upper, lo = upper; 779 lo = (lo << 52) >> 52; 780 up -= lo; 781 up = (int32_t)up; 782 lui(Rd, up); 783 addi(Rd, Rd, lo); 784 785 // Load the rest 32 bits. 786 slli(Rd, Rd, 12); 787 addi(Rd, Rd, (int32_t)lower >> 20); 788 slli(Rd, Rd, 12); 789 lower = ((int32_t)imm << 12) >> 20; 790 addi(Rd, Rd, lower); 791 slli(Rd, Rd, 8); 792 lower = imm & 0xff; 793 addi(Rd, Rd, lower); 794 } 795 796 void MacroAssembler::li(Register Rd, int64_t imm) { 797 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 798 // li -> c.li 799 if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { 800 c_li(Rd, imm); 801 return; 802 } 803 804 int shift = 12; 805 int64_t upper = imm, lower = imm; 806 // Split imm to a lower 12-bit sign-extended part and the remainder, 807 // because addi will sign-extend the lower imm. 808 lower = ((int32_t)imm << 20) >> 20; 809 upper -= lower; 810 811 // Test whether imm is a 32-bit integer. 812 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 813 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 814 while (((upper >> shift) & 1) == 0) { shift++; } 815 upper >>= shift; 816 li(Rd, upper); 817 slli(Rd, Rd, shift); 818 if (lower != 0) { 819 addi(Rd, Rd, lower); 820 } 821 } else { 822 // 32-bit integer 823 Register hi_Rd = zr; 824 if (upper != 0) { 825 lui(Rd, (int32_t)upper); 826 hi_Rd = Rd; 827 } 828 if (lower != 0 || hi_Rd == zr) { 829 addiw(Rd, hi_Rd, lower); 830 } 831 } 832 } 833 834 #define INSN(NAME, REGISTER) \ 835 void MacroAssembler::NAME(const address dest, Register temp) { \ 836 assert_cond(dest != NULL); \ 837 int64_t distance = dest - pc(); \ 838 if (is_imm_in_range(distance, 20, 1)) { \ 839 Assembler::jal(REGISTER, distance); \ 840 } else { \ 841 assert(temp != noreg, "expecting a register"); \ 842 int32_t offset = 0; \ 843 movptr(temp, dest, offset); \ 844 Assembler::jalr(REGISTER, temp, offset); \ 845 } \ 846 } \ 847 848 INSN(j, x0); 849 INSN(jal, x1); 850 851 #undef INSN 852 853 #define INSN(NAME, REGISTER) \ 854 void MacroAssembler::NAME(const Address &adr, Register temp) { \ 855 switch (adr.getMode()) { \ 856 case Address::literal: { \ 857 relocate(adr.rspec(), [&] { \ 858 NAME(adr.target(), temp); \ 859 }); \ 860 break; \ 861 } \ 862 case Address::base_plus_offset: { \ 863 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 864 la(temp, Address(adr.base(), adr.offset() - offset)); \ 865 Assembler::jalr(REGISTER, temp, offset); \ 866 break; \ 867 } \ 868 default: \ 869 ShouldNotReachHere(); \ 870 } \ 871 } 872 873 INSN(j, x0); 874 INSN(jal, x1); 875 876 #undef INSN 877 878 #define INSN(NAME) \ 879 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \ 880 assert_cond(dest != NULL); \ 881 int64_t distance = dest - pc(); \ 882 if (is_imm_in_range(distance, 20, 1)) { \ 883 Assembler::NAME(Rd, distance); \ 884 } else { \ 885 assert_different_registers(Rd, temp); \ 886 int32_t offset = 0; \ 887 movptr(temp, dest, offset); \ 888 jalr(Rd, temp, offset); \ 889 } \ 890 } \ 891 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \ 892 assert_different_registers(Rd, temp); \ 893 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \ 894 } 895 896 INSN(jal); 897 898 #undef INSN 899 900 #define INSN(NAME, REGISTER) \ 901 void MacroAssembler::NAME(Label &l, Register temp) { \ 902 jal(REGISTER, l, temp); \ 903 } \ 904 905 INSN(j, x0); 906 INSN(jal, x1); 907 908 #undef INSN 909 910 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { 911 if (L.is_bound()) { 912 (this->*insn)(Rt, target(L), tmp); 913 } else { 914 L.add_patch_at(code(), locator()); 915 (this->*insn)(Rt, pc(), tmp); 916 } 917 } 918 919 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 920 if (L.is_bound()) { 921 (this->*insn)(Rt, target(L)); 922 } else { 923 L.add_patch_at(code(), locator()); 924 (this->*insn)(Rt, pc()); 925 } 926 } 927 928 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 929 compare_and_branch_insn insn, 930 compare_and_branch_label_insn neg_insn, bool is_far) { 931 if (is_far) { 932 Label done; 933 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 934 j(L); 935 bind(done); 936 } else { 937 if (L.is_bound()) { 938 (this->*insn)(r1, r2, target(L)); 939 } else { 940 L.add_patch_at(code(), locator()); 941 (this->*insn)(r1, r2, pc()); 942 } 943 } 944 } 945 946 #define INSN(NAME, NEG_INSN) \ 947 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 948 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 949 } 950 951 INSN(beq, bne); 952 INSN(bne, beq); 953 INSN(blt, bge); 954 INSN(bge, blt); 955 INSN(bltu, bgeu); 956 INSN(bgeu, bltu); 957 958 #undef INSN 959 960 #define INSN(NAME) \ 961 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 962 NAME(Rs, zr, dest); \ 963 } \ 964 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 965 NAME(Rs, zr, l, is_far); \ 966 } \ 967 968 INSN(beq); 969 INSN(bne); 970 INSN(blt); 971 INSN(ble); 972 INSN(bge); 973 INSN(bgt); 974 975 #undef INSN 976 977 #define INSN(NAME, NEG_INSN) \ 978 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 979 NEG_INSN(Rt, Rs, dest); \ 980 } \ 981 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 982 NEG_INSN(Rt, Rs, l, is_far); \ 983 } 984 985 INSN(bgt, blt); 986 INSN(ble, bge); 987 INSN(bgtu, bltu); 988 INSN(bleu, bgeu); 989 990 #undef INSN 991 992 // Float compare branch instructions 993 994 #define INSN(NAME, FLOATCMP, BRANCH) \ 995 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 996 FLOATCMP##_s(t0, Rs1, Rs2); \ 997 BRANCH(t0, l, is_far); \ 998 } \ 999 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1000 FLOATCMP##_d(t0, Rs1, Rs2); \ 1001 BRANCH(t0, l, is_far); \ 1002 } 1003 1004 INSN(beq, feq, bnez); 1005 INSN(bne, feq, beqz); 1006 1007 #undef INSN 1008 1009 1010 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1011 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1012 bool is_far, bool is_unordered) { \ 1013 if (is_unordered) { \ 1014 /* jump if either source is NaN or condition is expected */ \ 1015 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1016 beqz(t0, l, is_far); \ 1017 } else { \ 1018 /* jump if no NaN in source and condition is expected */ \ 1019 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1020 bnez(t0, l, is_far); \ 1021 } \ 1022 } \ 1023 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1024 bool is_far, bool is_unordered) { \ 1025 if (is_unordered) { \ 1026 /* jump if either source is NaN or condition is expected */ \ 1027 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1028 beqz(t0, l, is_far); \ 1029 } else { \ 1030 /* jump if no NaN in source and condition is expected */ \ 1031 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1032 bnez(t0, l, is_far); \ 1033 } \ 1034 } 1035 1036 INSN(ble, fle, flt); 1037 INSN(blt, flt, fle); 1038 1039 #undef INSN 1040 1041 #define INSN(NAME, CMP) \ 1042 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1043 bool is_far, bool is_unordered) { \ 1044 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1045 } \ 1046 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1047 bool is_far, bool is_unordered) { \ 1048 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1049 } 1050 1051 INSN(bgt, blt); 1052 INSN(bge, ble); 1053 1054 #undef INSN 1055 1056 1057 #define INSN(NAME, CSR) \ 1058 void MacroAssembler::NAME(Register Rd) { \ 1059 csrr(Rd, CSR); \ 1060 } 1061 1062 INSN(rdinstret, CSR_INSTRET); 1063 INSN(rdcycle, CSR_CYCLE); 1064 INSN(rdtime, CSR_TIME); 1065 INSN(frcsr, CSR_FCSR); 1066 INSN(frrm, CSR_FRM); 1067 INSN(frflags, CSR_FFLAGS); 1068 1069 #undef INSN 1070 1071 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1072 csrrs(Rd, csr, x0); 1073 } 1074 1075 #define INSN(NAME, OPFUN) \ 1076 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1077 OPFUN(x0, csr, Rs); \ 1078 } 1079 1080 INSN(csrw, csrrw); 1081 INSN(csrs, csrrs); 1082 INSN(csrc, csrrc); 1083 1084 #undef INSN 1085 1086 #define INSN(NAME, OPFUN) \ 1087 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1088 OPFUN(x0, csr, imm); \ 1089 } 1090 1091 INSN(csrwi, csrrwi); 1092 INSN(csrsi, csrrsi); 1093 INSN(csrci, csrrci); 1094 1095 #undef INSN 1096 1097 #define INSN(NAME, CSR) \ 1098 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1099 csrrw(Rd, CSR, Rs); \ 1100 } 1101 1102 INSN(fscsr, CSR_FCSR); 1103 INSN(fsrm, CSR_FRM); 1104 INSN(fsflags, CSR_FFLAGS); 1105 1106 #undef INSN 1107 1108 #define INSN(NAME) \ 1109 void MacroAssembler::NAME(Register Rs) { \ 1110 NAME(x0, Rs); \ 1111 } 1112 1113 INSN(fscsr); 1114 INSN(fsrm); 1115 INSN(fsflags); 1116 1117 #undef INSN 1118 1119 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1120 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1121 csrrwi(Rd, CSR_FRM, imm); 1122 } 1123 1124 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1125 csrrwi(Rd, CSR_FFLAGS, imm); 1126 } 1127 1128 #define INSN(NAME) \ 1129 void MacroAssembler::NAME(unsigned imm) { \ 1130 NAME(x0, imm); \ 1131 } 1132 1133 INSN(fsrmi); 1134 INSN(fsflagsi); 1135 1136 #undef INSN 1137 1138 void MacroAssembler::push_reg(Register Rs) 1139 { 1140 addi(esp, esp, 0 - wordSize); 1141 sd(Rs, Address(esp, 0)); 1142 } 1143 1144 void MacroAssembler::pop_reg(Register Rd) 1145 { 1146 ld(Rd, Address(esp, 0)); 1147 addi(esp, esp, wordSize); 1148 } 1149 1150 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1151 int count = 0; 1152 // Scan bitset to accumulate register pairs 1153 for (int reg = 31; reg >= 0; reg--) { 1154 if ((1U << 31) & bitset) { 1155 regs[count++] = reg; 1156 } 1157 bitset <<= 1; 1158 } 1159 return count; 1160 } 1161 1162 // Push integer registers in the bitset supplied. Don't push sp. 1163 // Return the number of words pushed 1164 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1165 DEBUG_ONLY(int words_pushed = 0;) 1166 unsigned char regs[32]; 1167 int count = bitset_to_regs(bitset, regs); 1168 // reserve one slot to align for odd count 1169 int offset = is_even(count) ? 0 : wordSize; 1170 1171 if (count) { 1172 addi(stack, stack, -count * wordSize - offset); 1173 } 1174 for (int i = count - 1; i >= 0; i--) { 1175 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1176 DEBUG_ONLY(words_pushed++;) 1177 } 1178 1179 assert(words_pushed == count, "oops, pushed != count"); 1180 1181 return count; 1182 } 1183 1184 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1185 DEBUG_ONLY(int words_popped = 0;) 1186 unsigned char regs[32]; 1187 int count = bitset_to_regs(bitset, regs); 1188 // reserve one slot to align for odd count 1189 int offset = is_even(count) ? 0 : wordSize; 1190 1191 for (int i = count - 1; i >= 0; i--) { 1192 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1193 DEBUG_ONLY(words_popped++;) 1194 } 1195 1196 if (count) { 1197 addi(stack, stack, count * wordSize + offset); 1198 } 1199 assert(words_popped == count, "oops, popped != count"); 1200 1201 return count; 1202 } 1203 1204 // Push floating-point registers in the bitset supplied. 1205 // Return the number of words pushed 1206 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1207 DEBUG_ONLY(int words_pushed = 0;) 1208 unsigned char regs[32]; 1209 int count = bitset_to_regs(bitset, regs); 1210 int push_slots = count + (count & 1); 1211 1212 if (count) { 1213 addi(stack, stack, -push_slots * wordSize); 1214 } 1215 1216 for (int i = count - 1; i >= 0; i--) { 1217 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1218 DEBUG_ONLY(words_pushed++;) 1219 } 1220 1221 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1222 1223 return count; 1224 } 1225 1226 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1227 DEBUG_ONLY(int words_popped = 0;) 1228 unsigned char regs[32]; 1229 int count = bitset_to_regs(bitset, regs); 1230 int pop_slots = count + (count & 1); 1231 1232 for (int i = count - 1; i >= 0; i--) { 1233 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1234 DEBUG_ONLY(words_popped++;) 1235 } 1236 1237 if (count) { 1238 addi(stack, stack, pop_slots * wordSize); 1239 } 1240 1241 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1242 1243 return count; 1244 } 1245 1246 #ifdef COMPILER2 1247 // Push vector registers in the bitset supplied. 1248 // Return the number of words pushed 1249 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1250 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1251 1252 // Scan bitset to accumulate register pairs 1253 unsigned char regs[32]; 1254 int count = bitset_to_regs(bitset, regs); 1255 1256 for (int i = 0; i < count; i++) { 1257 sub(stack, stack, vector_size_in_bytes); 1258 vs1r_v(as_VectorRegister(regs[i]), stack); 1259 } 1260 1261 return count * vector_size_in_bytes / wordSize; 1262 } 1263 1264 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1265 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1266 1267 // Scan bitset to accumulate register pairs 1268 unsigned char regs[32]; 1269 int count = bitset_to_regs(bitset, regs); 1270 1271 for (int i = count - 1; i >= 0; i--) { 1272 vl1re8_v(as_VectorRegister(regs[i]), stack); 1273 add(stack, stack, vector_size_in_bytes); 1274 } 1275 1276 return count * vector_size_in_bytes / wordSize; 1277 } 1278 #endif // COMPILER2 1279 1280 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1281 // Push integer registers x7, x10-x17, x28-x31. 1282 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1283 1284 // Push float registers f0-f7, f10-f17, f28-f31. 1285 addi(sp, sp, - wordSize * 20); 1286 int offset = 0; 1287 for (int i = 0; i < 32; i++) { 1288 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1289 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1290 } 1291 } 1292 } 1293 1294 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1295 int offset = 0; 1296 for (int i = 0; i < 32; i++) { 1297 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1298 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1299 } 1300 } 1301 addi(sp, sp, wordSize * 20); 1302 1303 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1304 } 1305 1306 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1307 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1308 push_reg(RegSet::range(x5, x31), sp); 1309 1310 // float registers 1311 addi(sp, sp, - 32 * wordSize); 1312 for (int i = 0; i < 32; i++) { 1313 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1314 } 1315 1316 // vector registers 1317 if (save_vectors) { 1318 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1319 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1320 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1321 add(t0, sp, vector_size_in_bytes * i); 1322 vse64_v(as_VectorRegister(i), t0); 1323 } 1324 } 1325 } 1326 1327 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1328 // vector registers 1329 if (restore_vectors) { 1330 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1331 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1332 vle64_v(as_VectorRegister(i), sp); 1333 add(sp, sp, vector_size_in_bytes * 8); 1334 } 1335 } 1336 1337 // float registers 1338 for (int i = 0; i < 32; i++) { 1339 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1340 } 1341 addi(sp, sp, 32 * wordSize); 1342 1343 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1344 pop_reg(RegSet::range(x5, x31), sp); 1345 } 1346 1347 static int patch_offset_in_jal(address branch, int64_t offset) { 1348 assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); 1349 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1350 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1351 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1352 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1353 return NativeInstruction::instruction_size; // only one instruction 1354 } 1355 1356 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1357 assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); 1358 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1359 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1360 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1361 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1362 return NativeInstruction::instruction_size; // only one instruction 1363 } 1364 1365 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1366 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1367 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1368 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1369 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; 1370 } 1371 1372 static int patch_addr_in_movptr(address branch, address target) { 1373 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load 1374 int32_t lower = ((intptr_t)target << 35) >> 35; 1375 int64_t upper = ((intptr_t)target - lower) >> 29; 1376 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1377 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1378 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1379 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1380 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1381 } 1382 1383 static int patch_imm_in_li64(address branch, address target) { 1384 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1385 int64_t lower = (intptr_t)target & 0xffffffff; 1386 lower = lower - ((lower << 44) >> 44); 1387 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1388 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1389 int64_t tmp_upper = upper, tmp_lower = upper; 1390 tmp_lower = (tmp_lower << 52) >> 52; 1391 tmp_upper -= tmp_lower; 1392 tmp_upper >>= 12; 1393 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1394 // upper = target[63:32] + 1. 1395 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1396 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1397 // Load the rest 32 bits. 1398 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1399 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1400 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1401 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1402 } 1403 1404 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1405 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1406 int64_t upper = (intptr_t)target; 1407 int32_t lower = (((int32_t)target) << 20) >> 20; 1408 upper -= lower; 1409 upper = (int32_t)upper; 1410 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1411 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1412 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1413 } 1414 1415 static long get_offset_of_jal(address insn_addr) { 1416 assert_cond(insn_addr != NULL); 1417 long offset = 0; 1418 unsigned insn = *(unsigned*)insn_addr; 1419 long val = (long)Assembler::sextract(insn, 31, 12); 1420 offset |= ((val >> 19) & 0x1) << 20; 1421 offset |= (val & 0xff) << 12; 1422 offset |= ((val >> 8) & 0x1) << 11; 1423 offset |= ((val >> 9) & 0x3ff) << 1; 1424 offset = (offset << 43) >> 43; 1425 return offset; 1426 } 1427 1428 static long get_offset_of_conditional_branch(address insn_addr) { 1429 long offset = 0; 1430 assert_cond(insn_addr != NULL); 1431 unsigned insn = *(unsigned*)insn_addr; 1432 offset = (long)Assembler::sextract(insn, 31, 31); 1433 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1434 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1435 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1436 offset = (offset << 41) >> 41; 1437 return offset; 1438 } 1439 1440 static long get_offset_of_pc_relative(address insn_addr) { 1441 long offset = 0; 1442 assert_cond(insn_addr != NULL); 1443 offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. 1444 offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. 1445 offset = (offset << 32) >> 32; 1446 return offset; 1447 } 1448 1449 static address get_target_of_movptr(address insn_addr) { 1450 assert_cond(insn_addr != NULL); 1451 intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. 1452 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. 1453 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. 1454 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. 1455 return (address) target_address; 1456 } 1457 1458 static address get_target_of_li64(address insn_addr) { 1459 assert_cond(insn_addr != NULL); 1460 intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. 1461 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. 1462 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. 1463 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. 1464 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. 1465 return (address)target_address; 1466 } 1467 1468 address MacroAssembler::get_target_of_li32(address insn_addr) { 1469 assert_cond(insn_addr != NULL); 1470 intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. 1471 target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. 1472 return (address)target_address; 1473 } 1474 1475 // Patch any kind of instruction; there may be several instructions. 1476 // Return the total length (in bytes) of the instructions. 1477 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 1478 assert_cond(branch != NULL); 1479 int64_t offset = target - branch; 1480 if (NativeInstruction::is_jal_at(branch)) { // jal 1481 return patch_offset_in_jal(branch, offset); 1482 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne 1483 return patch_offset_in_conditional_branch(branch, offset); 1484 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load 1485 return patch_offset_in_pc_relative(branch, offset); 1486 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr 1487 return patch_addr_in_movptr(branch, target); 1488 } else if (NativeInstruction::is_li64_at(branch)) { // li64 1489 return patch_imm_in_li64(branch, target); 1490 } else if (NativeInstruction::is_li32_at(branch)) { // li32 1491 int64_t imm = (intptr_t)target; 1492 return patch_imm_in_li32(branch, (int32_t)imm); 1493 } else { 1494 #ifdef ASSERT 1495 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1496 *(unsigned*)branch, p2i(branch)); 1497 Disassembler::decode(branch - 16, branch + 16); 1498 #endif 1499 ShouldNotReachHere(); 1500 return -1; 1501 } 1502 } 1503 1504 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1505 long offset = 0; 1506 assert_cond(insn_addr != NULL); 1507 if (NativeInstruction::is_jal_at(insn_addr)) { // jal 1508 offset = get_offset_of_jal(insn_addr); 1509 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1510 offset = get_offset_of_conditional_branch(insn_addr); 1511 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1512 offset = get_offset_of_pc_relative(insn_addr); 1513 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr 1514 return get_target_of_movptr(insn_addr); 1515 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 1516 return get_target_of_li64(insn_addr); 1517 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 1518 return get_target_of_li32(insn_addr); 1519 } else { 1520 ShouldNotReachHere(); 1521 } 1522 return address(((uintptr_t)insn_addr + offset)); 1523 } 1524 1525 int MacroAssembler::patch_oop(address insn_addr, address o) { 1526 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1527 // narrow OOPs by setting the upper 16 bits in the first 1528 // instruction. 1529 if (NativeInstruction::is_li32_at(insn_addr)) { 1530 // Move narrow OOP 1531 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1532 return patch_imm_in_li32(insn_addr, (int32_t)n); 1533 } else if (NativeInstruction::is_movptr_at(insn_addr)) { 1534 // Move wide OOP 1535 return patch_addr_in_movptr(insn_addr, o); 1536 } 1537 ShouldNotReachHere(); 1538 return -1; 1539 } 1540 1541 void MacroAssembler::reinit_heapbase() { 1542 if (UseCompressedOops) { 1543 if (Universe::is_fully_initialized()) { 1544 mv(xheapbase, CompressedOops::ptrs_base()); 1545 } else { 1546 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1547 relocate(target.rspec(), [&] { 1548 int32_t offset; 1549 la_patchable(xheapbase, target, offset); 1550 ld(xheapbase, Address(xheapbase, offset)); 1551 }); 1552 } 1553 } 1554 } 1555 1556 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) { 1557 int64_t imm64 = (int64_t)addr; 1558 #ifndef PRODUCT 1559 { 1560 char buffer[64]; 1561 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); 1562 block_comment(buffer); 1563 } 1564 #endif 1565 assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (int64_t)-1), 1566 "bit 47 overflows in address constant"); 1567 // Load upper 31 bits 1568 int64_t imm = imm64 >> 17; 1569 int64_t upper = imm, lower = imm; 1570 lower = (lower << 52) >> 52; 1571 upper -= lower; 1572 upper = (int32_t)upper; 1573 lui(Rd, upper); 1574 addi(Rd, Rd, lower); 1575 1576 // Load the rest 17 bits. 1577 slli(Rd, Rd, 11); 1578 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1579 slli(Rd, Rd, 6); 1580 1581 // This offset will be used by following jalr/ld. 1582 offset = imm64 & 0x3f; 1583 } 1584 1585 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1586 if (is_imm_in_range(increment, 12, 0)) { 1587 addi(Rd, Rn, increment); 1588 } else { 1589 assert_different_registers(Rn, temp); 1590 li(temp, increment); 1591 add(Rd, Rn, temp); 1592 } 1593 } 1594 1595 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1596 if (is_imm_in_range(increment, 12, 0)) { 1597 addiw(Rd, Rn, increment); 1598 } else { 1599 assert_different_registers(Rn, temp); 1600 li(temp, increment); 1601 addw(Rd, Rn, temp); 1602 } 1603 } 1604 1605 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1606 if (is_imm_in_range(-decrement, 12, 0)) { 1607 addi(Rd, Rn, -decrement); 1608 } else { 1609 assert_different_registers(Rn, temp); 1610 li(temp, decrement); 1611 sub(Rd, Rn, temp); 1612 } 1613 } 1614 1615 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1616 if (is_imm_in_range(-decrement, 12, 0)) { 1617 addiw(Rd, Rn, -decrement); 1618 } else { 1619 assert_different_registers(Rn, temp); 1620 li(temp, decrement); 1621 subw(Rd, Rn, temp); 1622 } 1623 } 1624 1625 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1626 andr(Rd, Rs1, Rs2); 1627 // addw: The result is clipped to 32 bits, then the sign bit is extended, 1628 // and the result is stored in Rd 1629 addw(Rd, Rd, zr); 1630 } 1631 1632 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1633 orr(Rd, Rs1, Rs2); 1634 // addw: The result is clipped to 32 bits, then the sign bit is extended, 1635 // and the result is stored in Rd 1636 addw(Rd, Rd, zr); 1637 } 1638 1639 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1640 xorr(Rd, Rs1, Rs2); 1641 // addw: The result is clipped to 32 bits, then the sign bit is extended, 1642 // and the result is stored in Rd 1643 addw(Rd, Rd, zr); 1644 } 1645 1646 // Note: load_unsigned_short used to be called load_unsigned_word. 1647 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1648 int off = offset(); 1649 lhu(dst, src); 1650 return off; 1651 } 1652 1653 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1654 int off = offset(); 1655 lbu(dst, src); 1656 return off; 1657 } 1658 1659 int MacroAssembler::load_signed_short(Register dst, Address src) { 1660 int off = offset(); 1661 lh(dst, src); 1662 return off; 1663 } 1664 1665 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1666 int off = offset(); 1667 lb(dst, src); 1668 return off; 1669 } 1670 1671 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1672 switch (size_in_bytes) { 1673 case 8: ld(dst, src); break; 1674 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1675 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1676 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1677 default: ShouldNotReachHere(); 1678 } 1679 } 1680 1681 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1682 switch (size_in_bytes) { 1683 case 8: sd(src, dst); break; 1684 case 4: sw(src, dst); break; 1685 case 2: sh(src, dst); break; 1686 case 1: sb(src, dst); break; 1687 default: ShouldNotReachHere(); 1688 } 1689 } 1690 1691 // reverse bytes in halfword in lower 16 bits and sign-extend 1692 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 1693 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 1694 if (UseZbb) { 1695 rev8(Rd, Rs); 1696 srai(Rd, Rd, 48); 1697 return; 1698 } 1699 assert_different_registers(Rs, tmp); 1700 assert_different_registers(Rd, tmp); 1701 srli(tmp, Rs, 8); 1702 andi(tmp, tmp, 0xFF); 1703 slli(Rd, Rs, 56); 1704 srai(Rd, Rd, 48); // sign-extend 1705 orr(Rd, Rd, tmp); 1706 } 1707 1708 // reverse bytes in lower word and sign-extend 1709 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 1710 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1711 if (UseZbb) { 1712 rev8(Rd, Rs); 1713 srai(Rd, Rd, 32); 1714 return; 1715 } 1716 assert_different_registers(Rs, tmp1, tmp2); 1717 assert_different_registers(Rd, tmp1, tmp2); 1718 revb_h_w_u(Rd, Rs, tmp1, tmp2); 1719 slli(tmp2, Rd, 48); 1720 srai(tmp2, tmp2, 32); // sign-extend 1721 srli(Rd, Rd, 16); 1722 orr(Rd, Rd, tmp2); 1723 } 1724 1725 // reverse bytes in halfword in lower 16 bits and zero-extend 1726 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1727 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 1728 if (UseZbb) { 1729 rev8(Rd, Rs); 1730 srli(Rd, Rd, 48); 1731 return; 1732 } 1733 assert_different_registers(Rs, tmp); 1734 assert_different_registers(Rd, tmp); 1735 srli(tmp, Rs, 8); 1736 andi(tmp, tmp, 0xFF); 1737 andi(Rd, Rs, 0xFF); 1738 slli(Rd, Rd, 8); 1739 orr(Rd, Rd, tmp); 1740 } 1741 1742 // reverse bytes in halfwords in lower 32 bits and zero-extend 1743 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1744 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1745 if (UseZbb) { 1746 rev8(Rd, Rs); 1747 rori(Rd, Rd, 32); 1748 roriw(Rd, Rd, 16); 1749 zero_extend(Rd, Rd, 32); 1750 return; 1751 } 1752 assert_different_registers(Rs, tmp1, tmp2); 1753 assert_different_registers(Rd, tmp1, tmp2); 1754 srli(tmp2, Rs, 16); 1755 revb_h_h_u(tmp2, tmp2, tmp1); 1756 revb_h_h_u(Rd, Rs, tmp1); 1757 slli(tmp2, tmp2, 16); 1758 orr(Rd, Rd, tmp2); 1759 } 1760 1761 // This method is only used for revb_h 1762 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 1763 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1764 assert_different_registers(Rs, tmp1, tmp2); 1765 assert_different_registers(Rd, tmp1); 1766 srli(tmp1, Rs, 48); 1767 andi(tmp2, tmp1, 0xFF); 1768 slli(tmp2, tmp2, 8); 1769 srli(tmp1, tmp1, 8); 1770 orr(tmp1, tmp1, tmp2); 1771 slli(Rd, Rs, 16); 1772 orr(Rd, Rd, tmp1); 1773 } 1774 1775 // reverse bytes in each halfword 1776 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 1777 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1778 if (UseZbb) { 1779 assert_different_registers(Rs, tmp1); 1780 assert_different_registers(Rd, tmp1); 1781 rev8(Rd, Rs); 1782 zero_extend(tmp1, Rd, 32); 1783 roriw(tmp1, tmp1, 16); 1784 slli(tmp1, tmp1, 32); 1785 srli(Rd, Rd, 32); 1786 roriw(Rd, Rd, 16); 1787 zero_extend(Rd, Rd, 32); 1788 orr(Rd, Rd, tmp1); 1789 return; 1790 } 1791 assert_different_registers(Rs, tmp1, tmp2); 1792 assert_different_registers(Rd, tmp1, tmp2); 1793 revb_h_helper(Rd, Rs, tmp1, tmp2); 1794 for (int i = 0; i < 3; ++i) { 1795 revb_h_helper(Rd, Rd, tmp1, tmp2); 1796 } 1797 } 1798 1799 // reverse bytes in each word 1800 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 1801 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1802 if (UseZbb) { 1803 rev8(Rd, Rs); 1804 rori(Rd, Rd, 32); 1805 return; 1806 } 1807 assert_different_registers(Rs, tmp1, tmp2); 1808 assert_different_registers(Rd, tmp1, tmp2); 1809 revb(Rd, Rs, tmp1, tmp2); 1810 ror_imm(Rd, Rd, 32); 1811 } 1812 1813 // reverse bytes in doubleword 1814 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 1815 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1816 if (UseZbb) { 1817 rev8(Rd, Rs); 1818 return; 1819 } 1820 assert_different_registers(Rs, tmp1, tmp2); 1821 assert_different_registers(Rd, tmp1, tmp2); 1822 andi(tmp1, Rs, 0xFF); 1823 slli(tmp1, tmp1, 8); 1824 for (int step = 8; step < 56; step += 8) { 1825 srli(tmp2, Rs, step); 1826 andi(tmp2, tmp2, 0xFF); 1827 orr(tmp1, tmp1, tmp2); 1828 slli(tmp1, tmp1, 8); 1829 } 1830 srli(Rd, Rs, 56); 1831 andi(Rd, Rd, 0xFF); 1832 orr(Rd, tmp1, Rd); 1833 } 1834 1835 // rotate right with shift bits 1836 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 1837 { 1838 if (UseZbb) { 1839 rori(dst, src, shift); 1840 return; 1841 } 1842 1843 assert_different_registers(dst, tmp); 1844 assert_different_registers(src, tmp); 1845 assert(shift < 64, "shift amount must be < 64"); 1846 slli(tmp, src, 64 - shift); 1847 srli(dst, src, shift); 1848 orr(dst, dst, tmp); 1849 } 1850 1851 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 1852 if (is_imm_in_range(imm, 12, 0)) { 1853 and_imm12(Rd, Rn, imm); 1854 } else { 1855 assert_different_registers(Rn, tmp); 1856 mv(tmp, imm); 1857 andr(Rd, Rn, tmp); 1858 } 1859 } 1860 1861 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 1862 ld(tmp1, adr); 1863 if (src.is_register()) { 1864 orr(tmp1, tmp1, src.as_register()); 1865 } else { 1866 if (is_imm_in_range(src.as_constant(), 12, 0)) { 1867 ori(tmp1, tmp1, src.as_constant()); 1868 } else { 1869 assert_different_registers(tmp1, tmp2); 1870 mv(tmp2, src.as_constant()); 1871 orr(tmp1, tmp1, tmp2); 1872 } 1873 } 1874 sd(tmp1, adr); 1875 } 1876 1877 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 1878 assert_different_registers(oop, trial_klass, tmp1, tmp2); 1879 if (UseCompressedClassPointers) { 1880 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 1881 if (CompressedKlassPointers::base() == NULL) { 1882 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 1883 beq(trial_klass, tmp1, L); 1884 return; 1885 } 1886 decode_klass_not_null(tmp1, tmp2); 1887 } else { 1888 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 1889 } 1890 beq(trial_klass, tmp1, L); 1891 } 1892 1893 // Move an oop into a register. 1894 void MacroAssembler::movoop(Register dst, jobject obj) { 1895 int oop_index; 1896 if (obj == NULL) { 1897 oop_index = oop_recorder()->allocate_oop_index(obj); 1898 } else { 1899 #ifdef ASSERT 1900 { 1901 ThreadInVMfromUnknown tiv; 1902 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 1903 } 1904 #endif 1905 oop_index = oop_recorder()->find_index(obj); 1906 } 1907 RelocationHolder rspec = oop_Relocation::spec(oop_index); 1908 1909 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 1910 mv(dst, Address((address)obj, rspec)); 1911 } else { 1912 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 1913 ld_constant(dst, Address(dummy, rspec)); 1914 } 1915 } 1916 1917 // Move a metadata address into a register. 1918 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 1919 int oop_index; 1920 if (obj == NULL) { 1921 oop_index = oop_recorder()->allocate_metadata_index(obj); 1922 } else { 1923 oop_index = oop_recorder()->find_index(obj); 1924 } 1925 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 1926 mv(dst, Address((address)obj, rspec)); 1927 } 1928 1929 // Writes to stack successive pages until offset reached to check for 1930 // stack overflow + shadow pages. This clobbers tmp. 1931 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1932 assert_different_registers(tmp, size, t0); 1933 // Bang stack for total size given plus shadow page size. 1934 // Bang one page at a time because large size can bang beyond yellow and 1935 // red zones. 1936 mv(t0, (int)os::vm_page_size()); 1937 Label loop; 1938 bind(loop); 1939 sub(tmp, sp, t0); 1940 subw(size, size, t0); 1941 sd(size, Address(tmp)); 1942 bgtz(size, loop); 1943 1944 // Bang down shadow pages too. 1945 // At this point, (tmp-0) is the last address touched, so don't 1946 // touch it again. (It was touched as (tmp-pagesize) but then tmp 1947 // was post-decremented.) Skip this address by starting at i=1, and 1948 // touch a few more pages below. N.B. It is important to touch all 1949 // the way down to and including i=StackShadowPages. 1950 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 1951 // this could be any sized move but this is can be a debugging crumb 1952 // so the bigger the better. 1953 sub(tmp, tmp, (int)os::vm_page_size()); 1954 sd(size, Address(tmp, 0)); 1955 } 1956 } 1957 1958 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 1959 int32_t offset = 0; 1960 _masm = masm; 1961 ExternalAddress target((address)flag_addr); 1962 _masm->relocate(target.rspec(), [&] { 1963 int32_t offset; 1964 _masm->la_patchable(t0, target, offset); 1965 _masm->lbu(t0, Address(t0, offset)); 1966 }); 1967 if (value) { 1968 _masm->bnez(t0, _label); 1969 } else { 1970 _masm->beqz(t0, _label); 1971 } 1972 } 1973 1974 SkipIfEqual::~SkipIfEqual() { 1975 _masm->bind(_label); 1976 _masm = NULL; 1977 } 1978 1979 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 1980 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 1981 ld(dst, Address(xmethod, Method::const_offset())); 1982 ld(dst, Address(dst, ConstMethod::constants_offset())); 1983 ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); 1984 ld(dst, Address(dst, mirror_offset)); 1985 resolve_oop_handle(dst, tmp1, tmp2); 1986 } 1987 1988 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 1989 // OopHandle::resolve is an indirection. 1990 assert_different_registers(result, tmp1, tmp2); 1991 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 1992 } 1993 1994 // ((WeakHandle)result).resolve() 1995 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 1996 assert_different_registers(result, tmp1, tmp2); 1997 Label resolved; 1998 1999 // A null weak handle resolves to null. 2000 beqz(result, resolved); 2001 2002 // Only 64 bit platforms support GCs that require a tmp register 2003 // Only IN_HEAP loads require a thread_tmp register 2004 // WeakHandle::resolve is an indirection like jweak. 2005 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2006 result, Address(result), tmp1, tmp2); 2007 bind(resolved); 2008 } 2009 2010 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2011 Register dst, Address src, 2012 Register tmp1, Register tmp2) { 2013 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2014 decorators = AccessInternal::decorator_fixup(decorators, type); 2015 bool as_raw = (decorators & AS_RAW) != 0; 2016 if (as_raw) { 2017 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2018 } else { 2019 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2020 } 2021 } 2022 2023 void MacroAssembler::null_check(Register reg, int offset) { 2024 if (needs_explicit_null_check(offset)) { 2025 // provoke OS NULL exception if reg = NULL by 2026 // accessing M[reg] w/o changing any registers 2027 // NOTE: this is plenty to provoke a segv 2028 ld(zr, Address(reg, 0)); 2029 } else { 2030 // nothing to do, (later) access of M[reg + offset] 2031 // will provoke OS NULL exception if reg = NULL 2032 } 2033 } 2034 2035 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2036 Address dst, Register val, 2037 Register tmp1, Register tmp2, Register tmp3) { 2038 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2039 decorators = AccessInternal::decorator_fixup(decorators, type); 2040 bool as_raw = (decorators & AS_RAW) != 0; 2041 if (as_raw) { 2042 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2043 } else { 2044 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2045 } 2046 } 2047 2048 // Algorithm must match CompressedOops::encode. 2049 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2050 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2051 if (CompressedOops::base() == NULL) { 2052 if (CompressedOops::shift() != 0) { 2053 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2054 srli(d, s, LogMinObjAlignmentInBytes); 2055 } else { 2056 mv(d, s); 2057 } 2058 } else { 2059 Label notNull; 2060 sub(d, s, xheapbase); 2061 bgez(d, notNull); 2062 mv(d, zr); 2063 bind(notNull); 2064 if (CompressedOops::shift() != 0) { 2065 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2066 srli(d, d, CompressedOops::shift()); 2067 } 2068 } 2069 } 2070 2071 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2072 assert_different_registers(dst, tmp); 2073 assert_different_registers(src, tmp); 2074 if (UseCompressedClassPointers) { 2075 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2076 decode_klass_not_null(dst, tmp); 2077 } else { 2078 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2079 } 2080 } 2081 2082 void MacroAssembler::load_klass_check_null(Register dst, Register src, Register tmp) { 2083 null_check(src, oopDesc::klass_offset_in_bytes()); 2084 load_klass(dst, src, tmp); 2085 } 2086 2087 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2088 // FIXME: Should this be a store release? concurrent gcs assumes 2089 // klass length is valid if klass field is not null. 2090 if (UseCompressedClassPointers) { 2091 encode_klass_not_null(src, tmp); 2092 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2093 } else { 2094 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2095 } 2096 } 2097 2098 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2099 if (UseCompressedClassPointers) { 2100 // Store to klass gap in destination 2101 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2102 } 2103 } 2104 2105 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2106 assert_different_registers(r, tmp); 2107 decode_klass_not_null(r, r, tmp); 2108 } 2109 2110 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2111 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2112 2113 if (CompressedKlassPointers::base() == NULL) { 2114 if (CompressedKlassPointers::shift() != 0) { 2115 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2116 slli(dst, src, LogKlassAlignmentInBytes); 2117 } else { 2118 mv(dst, src); 2119 } 2120 return; 2121 } 2122 2123 Register xbase = dst; 2124 if (dst == src) { 2125 xbase = tmp; 2126 } 2127 2128 assert_different_registers(src, xbase); 2129 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2130 2131 if (CompressedKlassPointers::shift() != 0) { 2132 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2133 assert_different_registers(t0, xbase); 2134 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2135 } else { 2136 add(dst, xbase, src); 2137 } 2138 } 2139 2140 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2141 assert_different_registers(r, tmp); 2142 encode_klass_not_null(r, r, tmp); 2143 } 2144 2145 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2146 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2147 2148 if (CompressedKlassPointers::base() == NULL) { 2149 if (CompressedKlassPointers::shift() != 0) { 2150 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2151 srli(dst, src, LogKlassAlignmentInBytes); 2152 } else { 2153 mv(dst, src); 2154 } 2155 return; 2156 } 2157 2158 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2159 CompressedKlassPointers::shift() == 0) { 2160 zero_extend(dst, src, 32); 2161 return; 2162 } 2163 2164 Register xbase = dst; 2165 if (dst == src) { 2166 xbase = tmp; 2167 } 2168 2169 assert_different_registers(src, xbase); 2170 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2171 sub(dst, src, xbase); 2172 if (CompressedKlassPointers::shift() != 0) { 2173 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2174 srli(dst, dst, LogKlassAlignmentInBytes); 2175 } 2176 } 2177 2178 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2179 decode_heap_oop_not_null(r, r); 2180 } 2181 2182 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2183 assert(UseCompressedOops, "should only be used for compressed headers"); 2184 assert(Universe::heap() != NULL, "java heap should be initialized"); 2185 // Cannot assert, unverified entry point counts instructions (see .ad file) 2186 // vtableStubs also counts instructions in pd_code_size_limit. 2187 // Also do not verify_oop as this is called by verify_oop. 2188 if (CompressedOops::shift() != 0) { 2189 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2190 slli(dst, src, LogMinObjAlignmentInBytes); 2191 if (CompressedOops::base() != NULL) { 2192 add(dst, xheapbase, dst); 2193 } 2194 } else { 2195 assert(CompressedOops::base() == NULL, "sanity"); 2196 mv(dst, src); 2197 } 2198 } 2199 2200 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2201 if (CompressedOops::base() == NULL) { 2202 if (CompressedOops::shift() != 0 || d != s) { 2203 slli(d, s, CompressedOops::shift()); 2204 } 2205 } else { 2206 Label done; 2207 mv(d, s); 2208 beqz(s, done); 2209 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2210 bind(done); 2211 } 2212 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2213 } 2214 2215 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2216 Register tmp2, Register tmp3, DecoratorSet decorators) { 2217 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2218 } 2219 2220 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2221 Register tmp2, DecoratorSet decorators) { 2222 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2223 } 2224 2225 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2226 Register tmp2, DecoratorSet decorators) { 2227 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2228 } 2229 2230 // Used for storing NULLs. 2231 void MacroAssembler::store_heap_oop_null(Address dst) { 2232 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2233 } 2234 2235 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2236 bool want_remainder) 2237 { 2238 // Full implementation of Java idiv and irem. The function 2239 // returns the (pc) offset of the div instruction - may be needed 2240 // for implicit exceptions. 2241 // 2242 // input : rs1: dividend 2243 // rs2: divisor 2244 // 2245 // result: either 2246 // quotient (= rs1 idiv rs2) 2247 // remainder (= rs1 irem rs2) 2248 2249 2250 int idivl_offset = offset(); 2251 if (!want_remainder) { 2252 divw(result, rs1, rs2); 2253 } else { 2254 remw(result, rs1, rs2); // result = rs1 % rs2; 2255 } 2256 return idivl_offset; 2257 } 2258 2259 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2260 bool want_remainder) 2261 { 2262 // Full implementation of Java ldiv and lrem. The function 2263 // returns the (pc) offset of the div instruction - may be needed 2264 // for implicit exceptions. 2265 // 2266 // input : rs1: dividend 2267 // rs2: divisor 2268 // 2269 // result: either 2270 // quotient (= rs1 idiv rs2) 2271 // remainder (= rs1 irem rs2) 2272 2273 int idivq_offset = offset(); 2274 if (!want_remainder) { 2275 div(result, rs1, rs2); 2276 } else { 2277 rem(result, rs1, rs2); // result = rs1 % rs2; 2278 } 2279 return idivq_offset; 2280 } 2281 2282 // Look up the method for a megamorpic invkkeinterface call. 2283 // The target method is determined by <intf_klass, itable_index>. 2284 // The receiver klass is in recv_klass. 2285 // On success, the result will be in method_result, and execution falls through. 2286 // On failure, execution transfers to the given label. 2287 void MacroAssembler::lookup_interface_method(Register recv_klass, 2288 Register intf_klass, 2289 RegisterOrConstant itable_index, 2290 Register method_result, 2291 Register scan_tmp, 2292 Label& L_no_such_interface, 2293 bool return_method) { 2294 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2295 assert_different_registers(method_result, intf_klass, scan_tmp); 2296 assert(recv_klass != method_result || !return_method, 2297 "recv_klass can be destroyed when mehtid isn't needed"); 2298 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2299 "caller must be same register for non-constant itable index as for method"); 2300 2301 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2302 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2303 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 2304 int scan_step = itableOffsetEntry::size() * wordSize; 2305 int vte_size = vtableEntry::size_in_bytes(); 2306 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2307 2308 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2309 2310 // %%% Could store the aligned, prescaled offset in the klassoop. 2311 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2312 add(scan_tmp, scan_tmp, vtable_base); 2313 2314 if (return_method) { 2315 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2316 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2317 if (itable_index.is_register()) { 2318 slli(t0, itable_index.as_register(), 3); 2319 } else { 2320 mv(t0, itable_index.as_constant() << 3); 2321 } 2322 add(recv_klass, recv_klass, t0); 2323 if (itentry_off) { 2324 add(recv_klass, recv_klass, itentry_off); 2325 } 2326 } 2327 2328 Label search, found_method; 2329 2330 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); 2331 beq(intf_klass, method_result, found_method); 2332 bind(search); 2333 // Check that the previous entry is non-null. A null entry means that 2334 // the receiver class doesn't implement the interface, and wasn't the 2335 // same as when the caller was compiled. 2336 beqz(method_result, L_no_such_interface, /* is_far */ true); 2337 addi(scan_tmp, scan_tmp, scan_step); 2338 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); 2339 bne(intf_klass, method_result, search); 2340 2341 bind(found_method); 2342 2343 // Got a hit. 2344 if (return_method) { 2345 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); 2346 add(method_result, recv_klass, scan_tmp); 2347 ld(method_result, Address(method_result)); 2348 } 2349 } 2350 2351 // virtual method calling 2352 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2353 RegisterOrConstant vtable_index, 2354 Register method_result) { 2355 const int base = in_bytes(Klass::vtable_start_offset()); 2356 assert(vtableEntry::size() * wordSize == 8, 2357 "adjust the scaling in the code below"); 2358 int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); 2359 2360 if (vtable_index.is_register()) { 2361 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2362 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2363 } else { 2364 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2365 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2366 } 2367 } 2368 2369 void MacroAssembler::membar(uint32_t order_constraint) { 2370 address prev = pc() - NativeMembar::instruction_size; 2371 address last = code()->last_insn(); 2372 2373 if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { 2374 NativeMembar *bar = NativeMembar_at(prev); 2375 // We are merging two memory barrier instructions. On RISCV we 2376 // can do this simply by ORing them together. 2377 bar->set_kind(bar->get_kind() | order_constraint); 2378 BLOCK_COMMENT("merged membar"); 2379 } else { 2380 code()->set_last_insn(pc()); 2381 2382 uint32_t predecessor = 0; 2383 uint32_t successor = 0; 2384 2385 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2386 fence(predecessor, successor); 2387 } 2388 } 2389 2390 // Form an address from base + offset in Rd. Rd my or may not 2391 // actually be used: you must use the Address that is returned. It 2392 // is up to you to ensure that the shift provided matches the size 2393 // of your data. 2394 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2395 if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 2396 return Address(base, byte_offset); 2397 } 2398 2399 assert_different_registers(Rd, base, noreg); 2400 2401 // Do it the hard way 2402 mv(Rd, byte_offset); 2403 add(Rd, base, Rd); 2404 return Address(Rd); 2405 } 2406 2407 void MacroAssembler::check_klass_subtype(Register sub_klass, 2408 Register super_klass, 2409 Register tmp_reg, 2410 Label& L_success) { 2411 Label L_failure; 2412 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); 2413 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); 2414 bind(L_failure); 2415 } 2416 2417 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2418 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2419 if (acquire) { 2420 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2421 } 2422 if (at_return) { 2423 bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); 2424 } else { 2425 andi(t0, t0, SafepointMechanism::poll_bit()); 2426 bnez(t0, slow_path, true /* is_far */); 2427 } 2428 } 2429 2430 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2431 Label &succeed, Label *fail) { 2432 // oldv holds comparison value 2433 // newv holds value to write in exchange 2434 // addr identifies memory word to compare against/update 2435 Label retry_load, nope; 2436 bind(retry_load); 2437 // Load reserved from the memory location 2438 lr_d(tmp, addr, Assembler::aqrl); 2439 // Fail and exit if it is not what we expect 2440 bne(tmp, oldv, nope); 2441 // If the store conditional succeeds, tmp will be zero 2442 sc_d(tmp, newv, addr, Assembler::rl); 2443 beqz(tmp, succeed); 2444 // Retry only when the store conditional failed 2445 j(retry_load); 2446 2447 bind(nope); 2448 membar(AnyAny); 2449 mv(oldv, tmp); 2450 if (fail != NULL) { 2451 j(*fail); 2452 } 2453 } 2454 2455 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 2456 Label &succeed, Label *fail) { 2457 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 2458 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 2459 } 2460 2461 void MacroAssembler::load_reserved(Register addr, 2462 enum operand_size size, 2463 Assembler::Aqrl acquire) { 2464 switch (size) { 2465 case int64: 2466 lr_d(t0, addr, acquire); 2467 break; 2468 case int32: 2469 lr_w(t0, addr, acquire); 2470 break; 2471 case uint32: 2472 lr_w(t0, addr, acquire); 2473 zero_extend(t0, t0, 32); 2474 break; 2475 default: 2476 ShouldNotReachHere(); 2477 } 2478 } 2479 2480 void MacroAssembler::store_conditional(Register addr, 2481 Register new_val, 2482 enum operand_size size, 2483 Assembler::Aqrl release) { 2484 switch (size) { 2485 case int64: 2486 sc_d(t0, new_val, addr, release); 2487 break; 2488 case int32: 2489 case uint32: 2490 sc_w(t0, new_val, addr, release); 2491 break; 2492 default: 2493 ShouldNotReachHere(); 2494 } 2495 } 2496 2497 2498 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 2499 Register new_val, 2500 enum operand_size size, 2501 Register tmp1, Register tmp2, Register tmp3) { 2502 assert(size == int8 || size == int16, "unsupported operand size"); 2503 2504 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 2505 2506 andi(shift, addr, 3); 2507 slli(shift, shift, 3); 2508 2509 andi(aligned_addr, addr, ~3); 2510 2511 if (size == int8) { 2512 mv(mask, 0xff); 2513 } else { 2514 // size == int16 case 2515 mv(mask, -1); 2516 zero_extend(mask, mask, 16); 2517 } 2518 sll(mask, mask, shift); 2519 2520 xori(not_mask, mask, -1); 2521 2522 sll(expected, expected, shift); 2523 andr(expected, expected, mask); 2524 2525 sll(new_val, new_val, shift); 2526 andr(new_val, new_val, mask); 2527 } 2528 2529 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 2530 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, 2531 // which are forced to work with 4-byte aligned address. 2532 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 2533 Register new_val, 2534 enum operand_size size, 2535 Assembler::Aqrl acquire, Assembler::Aqrl release, 2536 Register result, bool result_as_bool, 2537 Register tmp1, Register tmp2, Register tmp3) { 2538 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2539 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2540 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2541 2542 Label retry, fail, done; 2543 2544 bind(retry); 2545 lr_w(old, aligned_addr, acquire); 2546 andr(tmp, old, mask); 2547 bne(tmp, expected, fail); 2548 2549 andr(tmp, old, not_mask); 2550 orr(tmp, tmp, new_val); 2551 sc_w(tmp, tmp, aligned_addr, release); 2552 bnez(tmp, retry); 2553 2554 if (result_as_bool) { 2555 mv(result, 1); 2556 j(done); 2557 2558 bind(fail); 2559 mv(result, zr); 2560 2561 bind(done); 2562 } else { 2563 andr(tmp, old, mask); 2564 2565 bind(fail); 2566 srl(result, tmp, shift); 2567 2568 if (size == int8) { 2569 sign_extend(result, result, 8); 2570 } else { 2571 // size == int16 case 2572 sign_extend(result, result, 16); 2573 } 2574 } 2575 } 2576 2577 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 2578 // the weak CAS stuff. The major difference is that it just failed when store conditional 2579 // failed. 2580 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 2581 Register new_val, 2582 enum operand_size size, 2583 Assembler::Aqrl acquire, Assembler::Aqrl release, 2584 Register result, 2585 Register tmp1, Register tmp2, Register tmp3) { 2586 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2587 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2588 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2589 2590 Label fail, done; 2591 2592 lr_w(old, aligned_addr, acquire); 2593 andr(tmp, old, mask); 2594 bne(tmp, expected, fail); 2595 2596 andr(tmp, old, not_mask); 2597 orr(tmp, tmp, new_val); 2598 sc_w(tmp, tmp, aligned_addr, release); 2599 bnez(tmp, fail); 2600 2601 // Success 2602 mv(result, 1); 2603 j(done); 2604 2605 // Fail 2606 bind(fail); 2607 mv(result, zr); 2608 2609 bind(done); 2610 } 2611 2612 void MacroAssembler::cmpxchg(Register addr, Register expected, 2613 Register new_val, 2614 enum operand_size size, 2615 Assembler::Aqrl acquire, Assembler::Aqrl release, 2616 Register result, bool result_as_bool) { 2617 assert(size != int8 && size != int16, "unsupported operand size"); 2618 2619 Label retry_load, done, ne_done; 2620 bind(retry_load); 2621 load_reserved(addr, size, acquire); 2622 bne(t0, expected, ne_done); 2623 store_conditional(addr, new_val, size, release); 2624 bnez(t0, retry_load); 2625 2626 // equal, succeed 2627 if (result_as_bool) { 2628 mv(result, 1); 2629 } else { 2630 mv(result, expected); 2631 } 2632 j(done); 2633 2634 // not equal, failed 2635 bind(ne_done); 2636 if (result_as_bool) { 2637 mv(result, zr); 2638 } else { 2639 mv(result, t0); 2640 } 2641 2642 bind(done); 2643 } 2644 2645 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 2646 Register new_val, 2647 enum operand_size size, 2648 Assembler::Aqrl acquire, Assembler::Aqrl release, 2649 Register result) { 2650 Label fail, done; 2651 load_reserved(addr, size, acquire); 2652 bne(t0, expected, fail); 2653 store_conditional(addr, new_val, size, release); 2654 bnez(t0, fail); 2655 2656 // Success 2657 mv(result, 1); 2658 j(done); 2659 2660 // Fail 2661 bind(fail); 2662 mv(result, zr); 2663 2664 bind(done); 2665 } 2666 2667 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 2668 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 2669 prev = prev->is_valid() ? prev : zr; \ 2670 if (incr.is_register()) { \ 2671 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2672 } else { \ 2673 mv(t0, incr.as_constant()); \ 2674 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2675 } \ 2676 return; \ 2677 } 2678 2679 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 2680 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 2681 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 2682 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 2683 2684 #undef ATOMIC_OP 2685 2686 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 2687 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 2688 prev = prev->is_valid() ? prev : zr; \ 2689 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2690 return; \ 2691 } 2692 2693 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 2694 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 2695 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 2696 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 2697 2698 #undef ATOMIC_XCHG 2699 2700 #define ATOMIC_XCHGU(OP1, OP2) \ 2701 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 2702 atomic_##OP2(prev, newv, addr); \ 2703 zero_extend(prev, prev, 32); \ 2704 return; \ 2705 } 2706 2707 ATOMIC_XCHGU(xchgwu, xchgw) 2708 ATOMIC_XCHGU(xchgalwu, xchgalw) 2709 2710 #undef ATOMIC_XCHGU 2711 2712 void MacroAssembler::far_jump(Address entry, Register tmp) { 2713 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2714 assert(CodeCache::find_blob(entry.target()) != NULL, 2715 "destination of far call not found in code cache"); 2716 assert(entry.rspec().type() == relocInfo::external_word_type 2717 || entry.rspec().type() == relocInfo::runtime_call_type 2718 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2719 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2720 if (far_branches()) { 2721 // We can use auipc + jalr here because we know that the total size of 2722 // the code cache cannot exceed 2Gb. 2723 relocate(entry.rspec(), [&] { 2724 int32_t offset; 2725 la_patchable(tmp, entry, offset); 2726 jalr(x0, tmp, offset); 2727 }); 2728 } else { 2729 j(entry); 2730 } 2731 } 2732 2733 void MacroAssembler::far_call(Address entry, Register tmp) { 2734 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2735 assert(CodeCache::find_blob(entry.target()) != NULL, 2736 "destination of far call not found in code cache"); 2737 assert(entry.rspec().type() == relocInfo::external_word_type 2738 || entry.rspec().type() == relocInfo::runtime_call_type 2739 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2740 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2741 if (far_branches()) { 2742 // We can use auipc + jalr here because we know that the total size of 2743 // the code cache cannot exceed 2Gb. 2744 relocate(entry.rspec(), [&] { 2745 int32_t offset; 2746 la_patchable(tmp, entry, offset); 2747 jalr(x1, tmp, offset); // link 2748 }); 2749 } else { 2750 jal(entry); // link 2751 } 2752 } 2753 2754 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2755 Register super_klass, 2756 Register tmp_reg, 2757 Label* L_success, 2758 Label* L_failure, 2759 Label* L_slow_path, 2760 Register super_check_offset) { 2761 assert_different_registers(sub_klass, super_klass, tmp_reg); 2762 bool must_load_sco = (super_check_offset == noreg); 2763 if (must_load_sco) { 2764 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 2765 } else { 2766 assert_different_registers(sub_klass, super_klass, super_check_offset); 2767 } 2768 2769 Label L_fallthrough; 2770 int label_nulls = 0; 2771 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 2772 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 2773 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 2774 assert(label_nulls <= 1, "at most one NULL in batch"); 2775 2776 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2777 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2778 Address super_check_offset_addr(super_klass, sco_offset); 2779 2780 // Hacked jmp, which may only be used just before L_fallthrough. 2781 #define final_jmp(label) \ 2782 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2783 else j(label) /*omit semi*/ 2784 2785 // If the pointers are equal, we are done (e.g., String[] elements). 2786 // This self-check enables sharing of secondary supertype arrays among 2787 // non-primary types such as array-of-interface. Otherwise, each such 2788 // type would need its own customized SSA. 2789 // We move this check to the front of the fast path because many 2790 // type checks are in fact trivially successful in this manner, 2791 // so we get a nicely predicted branch right at the start of the check. 2792 beq(sub_klass, super_klass, *L_success); 2793 2794 // Check the supertype display: 2795 if (must_load_sco) { 2796 lwu(tmp_reg, super_check_offset_addr); 2797 super_check_offset = tmp_reg; 2798 } 2799 add(t0, sub_klass, super_check_offset); 2800 Address super_check_addr(t0); 2801 ld(t0, super_check_addr); // load displayed supertype 2802 2803 // This check has worked decisively for primary supers. 2804 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2805 // (Secondary supers are interfaces and very deeply nested subtypes.) 2806 // This works in the same check above because of a tricky aliasing 2807 // between the super_Cache and the primary super display elements. 2808 // (The 'super_check_addr' can address either, as the case requires.) 2809 // Note that the cache is updated below if it does not help us find 2810 // what we need immediately. 2811 // So if it was a primary super, we can just fail immediately. 2812 // Otherwise, it's the slow path for us (no success at this point). 2813 2814 beq(super_klass, t0, *L_success); 2815 mv(t1, sc_offset); 2816 if (L_failure == &L_fallthrough) { 2817 beq(super_check_offset, t1, *L_slow_path); 2818 } else { 2819 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 2820 final_jmp(*L_slow_path); 2821 } 2822 2823 bind(L_fallthrough); 2824 2825 #undef final_jmp 2826 } 2827 2828 // Scans count pointer sized words at [addr] for occurrence of value, 2829 // generic 2830 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 2831 Register tmp) { 2832 Label Lloop, Lexit; 2833 beqz(count, Lexit); 2834 bind(Lloop); 2835 ld(tmp, addr); 2836 beq(value, tmp, Lexit); 2837 add(addr, addr, wordSize); 2838 sub(count, count, 1); 2839 bnez(count, Lloop); 2840 bind(Lexit); 2841 } 2842 2843 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 2844 Register super_klass, 2845 Register tmp1_reg, 2846 Register tmp2_reg, 2847 Label* L_success, 2848 Label* L_failure) { 2849 assert_different_registers(sub_klass, super_klass, tmp1_reg); 2850 if (tmp2_reg != noreg) { 2851 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 2852 } 2853 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 2854 2855 Label L_fallthrough; 2856 int label_nulls = 0; 2857 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 2858 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 2859 2860 assert(label_nulls <= 1, "at most one NULL in the batch"); 2861 2862 // A couple of useful fields in sub_klass: 2863 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 2864 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2865 Address secondary_supers_addr(sub_klass, ss_offset); 2866 Address super_cache_addr( sub_klass, sc_offset); 2867 2868 BLOCK_COMMENT("check_klass_subtype_slow_path"); 2869 2870 // Do a linear scan of the secondary super-klass chain. 2871 // This code is rarely used, so simplicity is a virtue here. 2872 // The repne_scan instruction uses fixed registers, which we must spill. 2873 // Don't worry too much about pre-existing connections with the input regs. 2874 2875 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 2876 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 2877 2878 RegSet pushed_registers; 2879 if (!IS_A_TEMP(x12)) { 2880 pushed_registers += x12; 2881 } 2882 if (!IS_A_TEMP(x15)) { 2883 pushed_registers += x15; 2884 } 2885 2886 if (super_klass != x10) { 2887 if (!IS_A_TEMP(x10)) { 2888 pushed_registers += x10; 2889 } 2890 } 2891 2892 push_reg(pushed_registers, sp); 2893 2894 // Get super_klass value into x10 (even if it was in x15 or x12) 2895 mv(x10, super_klass); 2896 2897 #ifndef PRODUCT 2898 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); 2899 Address pst_counter_addr(t1); 2900 ld(t0, pst_counter_addr); 2901 add(t0, t0, 1); 2902 sd(t0, pst_counter_addr); 2903 #endif // PRODUCT 2904 2905 // We will consult the secondary-super array. 2906 ld(x15, secondary_supers_addr); 2907 // Load the array length. 2908 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 2909 // Skip to start of data. 2910 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 2911 2912 // Set t0 to an obvious invalid value, falling through by default 2913 mv(t0, -1); 2914 // Scan X12 words at [X15] for an occurrence of X10. 2915 repne_scan(x15, x10, x12, t0); 2916 2917 // pop will restore x10, so we should use a temp register to keep its value 2918 mv(t1, x10); 2919 2920 // Unspill the temp registers: 2921 pop_reg(pushed_registers, sp); 2922 2923 bne(t1, t0, *L_failure); 2924 2925 // Success. Cache the super we found an proceed in triumph. 2926 sd(super_klass, super_cache_addr); 2927 2928 if (L_success != &L_fallthrough) { 2929 j(*L_success); 2930 } 2931 2932 #undef IS_A_TEMP 2933 2934 bind(L_fallthrough); 2935 } 2936 2937 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 2938 void MacroAssembler::tlab_allocate(Register obj, 2939 Register var_size_in_bytes, 2940 int con_size_in_bytes, 2941 Register tmp1, 2942 Register tmp2, 2943 Label& slow_case, 2944 bool is_far) { 2945 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2946 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 2947 } 2948 2949 // get_thread() can be called anywhere inside generated code so we 2950 // need to save whatever non-callee save context might get clobbered 2951 // by the call to Thread::current() or, indeed, the call setup code. 2952 void MacroAssembler::get_thread(Register thread) { 2953 // save all call-clobbered regs except thread 2954 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 2955 RegSet::range(x28, x31) + ra - thread; 2956 push_reg(saved_regs, sp); 2957 2958 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 2959 jalr(ra); 2960 if (thread != c_rarg0) { 2961 mv(thread, c_rarg0); 2962 } 2963 2964 // restore pushed registers 2965 pop_reg(saved_regs, sp); 2966 } 2967 2968 void MacroAssembler::load_byte_map_base(Register reg) { 2969 CardTable::CardValue* byte_map_base = 2970 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 2971 mv(reg, (uint64_t)byte_map_base); 2972 } 2973 2974 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { 2975 unsigned long low_address = (uintptr_t)CodeCache::low_bound(); 2976 unsigned long high_address = (uintptr_t)CodeCache::high_bound(); 2977 unsigned long dest_address = (uintptr_t)dest.target(); 2978 long offset_low = dest_address - low_address; 2979 long offset_high = dest_address - high_address; 2980 2981 assert(is_valid_riscv64_address(dest.target()), "bad address"); 2982 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); 2983 2984 // RISC-V doesn't compute a page-aligned address, in order to partially 2985 // compensate for the use of *signed* offsets in its base+disp12 2986 // addressing mode (RISC-V's PC-relative reach remains asymmetric 2987 // [-(2G + 2K), 2G - 2K). 2988 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { 2989 int64_t distance = dest.target() - pc(); 2990 auipc(reg1, (int32_t)distance + 0x800); 2991 offset = ((int32_t)distance << 20) >> 20; 2992 } else { 2993 movptr(reg1, dest.target(), offset); 2994 } 2995 } 2996 2997 void MacroAssembler::build_frame(int framesize) { 2998 assert(framesize >= 2, "framesize must include space for FP/RA"); 2999 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3000 sub(sp, sp, framesize); 3001 sd(fp, Address(sp, framesize - 2 * wordSize)); 3002 sd(ra, Address(sp, framesize - wordSize)); 3003 if (PreserveFramePointer) { add(fp, sp, framesize); } 3004 } 3005 3006 void MacroAssembler::remove_frame(int framesize) { 3007 assert(framesize >= 2, "framesize must include space for FP/RA"); 3008 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3009 ld(fp, Address(sp, framesize - 2 * wordSize)); 3010 ld(ra, Address(sp, framesize - wordSize)); 3011 add(sp, sp, framesize); 3012 } 3013 3014 void MacroAssembler::reserved_stack_check() { 3015 // testing if reserved zone needs to be enabled 3016 Label no_reserved_zone_enabling; 3017 3018 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3019 bltu(sp, t0, no_reserved_zone_enabling); 3020 3021 enter(); // RA and FP are live. 3022 mv(c_rarg0, xthread); 3023 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3024 relocate(target.rspec(), [&] { 3025 int32_t offset; 3026 la_patchable(t0, target, offset); 3027 jalr(x1, t0, offset); 3028 }); 3029 leave(); 3030 3031 // We have already removed our own frame. 3032 // throw_delayed_StackOverflowError will think that it's been 3033 // called by our caller. 3034 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()); 3035 relocate(target.rspec(), [&] { 3036 int32_t offset; 3037 la_patchable(t0, target, offset); 3038 jalr(x0, t0, offset); 3039 }); 3040 should_not_reach_here(); 3041 3042 bind(no_reserved_zone_enabling); 3043 } 3044 3045 // Move the address of the polling page into dest. 3046 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3047 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3048 } 3049 3050 // Read the polling page. The address of the polling page must 3051 // already be in r. 3052 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3053 relocate(rtype, [&] { 3054 lwu(zr, Address(r, offset)); 3055 }); 3056 } 3057 3058 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3059 #ifdef ASSERT 3060 { 3061 ThreadInVMfromUnknown tiv; 3062 assert (UseCompressedOops, "should only be used for compressed oops"); 3063 assert (Universe::heap() != NULL, "java heap should be initialized"); 3064 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3065 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3066 } 3067 #endif 3068 int oop_index = oop_recorder()->find_index(obj); 3069 relocate(oop_Relocation::spec(oop_index), [&] { 3070 li32(dst, 0xDEADBEEF); 3071 }); 3072 zero_extend(dst, dst, 32); 3073 } 3074 3075 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3076 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3077 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3078 int index = oop_recorder()->find_index(k); 3079 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3080 3081 narrowKlass nk = CompressedKlassPointers::encode(k); 3082 relocate(metadata_Relocation::spec(index), [&] { 3083 li32(dst, nk); 3084 }); 3085 zero_extend(dst, dst, 32); 3086 } 3087 3088 // Maybe emit a call via a trampoline. If the code cache is small 3089 // trampolines won't be emitted. 3090 address MacroAssembler::trampoline_call(Address entry) { 3091 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3092 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3093 entry.rspec().type() == relocInfo::static_call_type || 3094 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3095 3096 address target = entry.target(); 3097 3098 // We need a trampoline if branches are far. 3099 if (far_branches()) { 3100 if (!in_scratch_emit_size()) { 3101 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3102 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3103 code()->share_trampoline_for(entry.target(), offset()); 3104 } else { 3105 address stub = emit_trampoline_stub(offset(), target); 3106 if (stub == NULL) { 3107 postcond(pc() == badAddress); 3108 return NULL; // CodeCache is full 3109 } 3110 } 3111 } 3112 target = pc(); 3113 } 3114 3115 address call_pc = pc(); 3116 #ifdef ASSERT 3117 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3118 assert_alignment(call_pc); 3119 } 3120 #endif 3121 relocate(entry.rspec(), [&] { 3122 jal(target); 3123 }); 3124 3125 postcond(pc() != badAddress); 3126 return call_pc; 3127 } 3128 3129 address MacroAssembler::ic_call(address entry, jint method_index) { 3130 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3131 IncompressibleRegion ir(this); // relocations 3132 movptr(t1, (address)Universe::non_oop_word()); 3133 assert_cond(entry != NULL); 3134 return trampoline_call(Address(entry, rh)); 3135 } 3136 3137 // Emit a trampoline stub for a call to a target which is too far away. 3138 // 3139 // code sequences: 3140 // 3141 // call-site: 3142 // branch-and-link to <destination> or <trampoline stub> 3143 // 3144 // Related trampoline stub for this call site in the stub section: 3145 // load the call target from the constant pool 3146 // branch (RA still points to the call site above) 3147 3148 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3149 address dest) { 3150 // Max stub size: alignment nop, TrampolineStub. 3151 address stub = start_a_stub(max_trampoline_stub_size()); 3152 if (stub == NULL) { 3153 return NULL; // CodeBuffer::expand failed 3154 } 3155 3156 // We are always 4-byte aligned here. 3157 assert_alignment(pc()); 3158 3159 // Create a trampoline stub relocation which relates this trampoline stub 3160 // with the call instruction at insts_call_instruction_offset in the 3161 // instructions code-section. 3162 3163 // Make sure the address of destination 8-byte aligned after 3 instructions. 3164 align(wordSize, NativeCallTrampolineStub::data_offset); 3165 3166 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3167 insts_call_instruction_offset); 3168 const int stub_start_offset = offset(); 3169 relocate(rh, [&] { 3170 // Now, create the trampoline stub's code: 3171 // - load the call 3172 // - call 3173 Label target; 3174 ld(t0, target); // auipc + ld 3175 jr(t0); // jalr 3176 bind(target); 3177 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, 3178 "should be"); 3179 assert(offset() % wordSize == 0, "bad alignment"); 3180 emit_int64((int64_t)dest); 3181 }); 3182 3183 const address stub_start_addr = addr_at(stub_start_offset); 3184 3185 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 3186 3187 end_a_stub(); 3188 return stub_start_addr; 3189 } 3190 3191 int MacroAssembler::max_trampoline_stub_size() { 3192 // Max stub size: alignment nop, TrampolineStub. 3193 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; 3194 } 3195 3196 int MacroAssembler::static_call_stub_size() { 3197 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr 3198 return 12 * NativeInstruction::instruction_size; 3199 } 3200 3201 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3202 switch (dst.getMode()) { 3203 case Address::base_plus_offset: 3204 // This is the expected mode, although we allow all the other 3205 // forms below. 3206 return form_address(tmp, dst.base(), dst.offset()); 3207 default: 3208 la(tmp, dst); 3209 return Address(tmp); 3210 } 3211 } 3212 3213 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3214 assert(((dst.getMode() == Address::base_plus_offset && 3215 is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), 3216 "invalid value and address mode combination"); 3217 Address adr = add_memory_helper(dst, tmp2); 3218 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3219 ld(tmp1, adr); 3220 add(tmp1, tmp1, value, tmp2); 3221 sd(tmp1, adr); 3222 } 3223 3224 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3225 assert(((dst.getMode() == Address::base_plus_offset && 3226 is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), 3227 "invalid value and address mode combination"); 3228 Address adr = add_memory_helper(dst, tmp2); 3229 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3230 lwu(tmp1, adr); 3231 addw(tmp1, tmp1, value, tmp2); 3232 sw(tmp1, adr); 3233 } 3234 3235 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3236 assert(((dst.getMode() == Address::base_plus_offset && 3237 is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), 3238 "invalid value and address mode combination"); 3239 Address adr = add_memory_helper(dst, tmp2); 3240 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3241 ld(tmp1, adr); 3242 sub(tmp1, tmp1, value, tmp2); 3243 sd(tmp1, adr); 3244 } 3245 3246 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3247 assert(((dst.getMode() == Address::base_plus_offset && 3248 is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), 3249 "invalid value and address mode combination"); 3250 Address adr = add_memory_helper(dst, tmp2); 3251 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3252 lwu(tmp1, adr); 3253 subw(tmp1, tmp1, value, tmp2); 3254 sw(tmp1, adr); 3255 } 3256 3257 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3258 assert_different_registers(src1, t0); 3259 relocate(src2.rspec(), [&] { 3260 int32_t offset; 3261 la_patchable(t0, src2, offset); 3262 ld(t0, Address(t0, offset)); 3263 }); 3264 beq(src1, t0, equal); 3265 } 3266 3267 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3268 load_method_holder(result, method); 3269 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3270 } 3271 3272 void MacroAssembler::load_method_holder(Register holder, Register method) { 3273 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3274 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3275 ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* 3276 } 3277 3278 // string indexof 3279 // compute index by trailing zeros 3280 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3281 Register match_mask, Register result, 3282 Register ch2, Register tmp, 3283 bool haystack_isL) { 3284 int haystack_chr_shift = haystack_isL ? 0 : 1; 3285 srl(match_mask, match_mask, trailing_zeros); 3286 srli(match_mask, match_mask, 1); 3287 srli(tmp, trailing_zeros, LogBitsPerByte); 3288 if (!haystack_isL) andi(tmp, tmp, 0xE); 3289 add(haystack, haystack, tmp); 3290 ld(ch2, Address(haystack)); 3291 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3292 add(result, result, tmp); 3293 } 3294 3295 // string indexof 3296 // Find pattern element in src, compute match mask, 3297 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3298 // match mask patterns and corresponding indices would be like: 3299 // - 0x8080808080808080 (Latin1) 3300 // - 7 6 5 4 3 2 1 0 (match index) 3301 // - 0x8000800080008000 (UTF16) 3302 // - 3 2 1 0 (match index) 3303 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3304 Register mask1, Register mask2) { 3305 xorr(src, pattern, src); 3306 sub(match_mask, src, mask1); 3307 orr(src, src, mask2); 3308 notr(src, src); 3309 andr(match_mask, match_mask, src); 3310 } 3311 3312 #ifdef COMPILER2 3313 // Code for BigInteger::mulAdd intrinsic 3314 // out = x10 3315 // in = x11 3316 // offset = x12 (already out.length-offset) 3317 // len = x13 3318 // k = x14 3319 // tmp = x28 3320 // 3321 // pseudo code from java implementation: 3322 // long kLong = k & LONG_MASK; 3323 // carry = 0; 3324 // offset = out.length-offset - 1; 3325 // for (int j = len - 1; j >= 0; j--) { 3326 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 3327 // out[offset--] = (int)product; 3328 // carry = product >>> 32; 3329 // } 3330 // return (int)carry; 3331 void MacroAssembler::mul_add(Register out, Register in, Register offset, 3332 Register len, Register k, Register tmp) { 3333 Label L_tail_loop, L_unroll, L_end; 3334 mv(tmp, out); 3335 mv(out, zr); 3336 blez(len, L_end); 3337 zero_extend(k, k, 32); 3338 slliw(t0, offset, LogBytesPerInt); 3339 add(offset, tmp, t0); 3340 slliw(t0, len, LogBytesPerInt); 3341 add(in, in, t0); 3342 3343 const int unroll = 8; 3344 mv(tmp, unroll); 3345 blt(len, tmp, L_tail_loop); 3346 bind(L_unroll); 3347 for (int i = 0; i < unroll; i++) { 3348 sub(in, in, BytesPerInt); 3349 lwu(t0, Address(in, 0)); 3350 mul(t1, t0, k); 3351 add(t0, t1, out); 3352 sub(offset, offset, BytesPerInt); 3353 lwu(t1, Address(offset, 0)); 3354 add(t0, t0, t1); 3355 sw(t0, Address(offset, 0)); 3356 srli(out, t0, 32); 3357 } 3358 subw(len, len, tmp); 3359 bge(len, tmp, L_unroll); 3360 3361 bind(L_tail_loop); 3362 blez(len, L_end); 3363 sub(in, in, BytesPerInt); 3364 lwu(t0, Address(in, 0)); 3365 mul(t1, t0, k); 3366 add(t0, t1, out); 3367 sub(offset, offset, BytesPerInt); 3368 lwu(t1, Address(offset, 0)); 3369 add(t0, t0, t1); 3370 sw(t0, Address(offset, 0)); 3371 srli(out, t0, 32); 3372 subw(len, len, 1); 3373 j(L_tail_loop); 3374 3375 bind(L_end); 3376 } 3377 3378 // add two unsigned input and output carry 3379 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 3380 { 3381 assert_different_registers(dst, carry); 3382 assert_different_registers(dst, src2); 3383 add(dst, src1, src2); 3384 sltu(carry, dst, src2); 3385 } 3386 3387 // add two input with carry 3388 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 3389 assert_different_registers(dst, carry); 3390 add(dst, src1, src2); 3391 add(dst, dst, carry); 3392 } 3393 3394 // add two unsigned input with carry and output carry 3395 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 3396 assert_different_registers(dst, src2); 3397 adc(dst, src1, src2, carry); 3398 sltu(carry, dst, src2); 3399 } 3400 3401 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 3402 Register src1, Register src2, Register carry) { 3403 cad(dest_lo, dest_lo, src1, carry); 3404 add(dest_hi, dest_hi, carry); 3405 cad(dest_lo, dest_lo, src2, carry); 3406 add(final_dest_hi, dest_hi, carry); 3407 } 3408 3409 /** 3410 * Multiply 32 bit by 32 bit first loop. 3411 */ 3412 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 3413 Register y, Register y_idx, Register z, 3414 Register carry, Register product, 3415 Register idx, Register kdx) { 3416 // jlong carry, x[], y[], z[]; 3417 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3418 // long product = y[idx] * x[xstart] + carry; 3419 // z[kdx] = (int)product; 3420 // carry = product >>> 32; 3421 // } 3422 // z[xstart] = (int)carry; 3423 3424 Label L_first_loop, L_first_loop_exit; 3425 blez(idx, L_first_loop_exit); 3426 3427 shadd(t0, xstart, x, t0, LogBytesPerInt); 3428 lwu(x_xstart, Address(t0, 0)); 3429 3430 bind(L_first_loop); 3431 subw(idx, idx, 1); 3432 shadd(t0, idx, y, t0, LogBytesPerInt); 3433 lwu(y_idx, Address(t0, 0)); 3434 mul(product, x_xstart, y_idx); 3435 add(product, product, carry); 3436 srli(carry, product, 32); 3437 subw(kdx, kdx, 1); 3438 shadd(t0, kdx, z, t0, LogBytesPerInt); 3439 sw(product, Address(t0, 0)); 3440 bgtz(idx, L_first_loop); 3441 3442 bind(L_first_loop_exit); 3443 } 3444 3445 /** 3446 * Multiply 64 bit by 64 bit first loop. 3447 */ 3448 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 3449 Register y, Register y_idx, Register z, 3450 Register carry, Register product, 3451 Register idx, Register kdx) { 3452 // 3453 // jlong carry, x[], y[], z[]; 3454 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3455 // huge_128 product = y[idx] * x[xstart] + carry; 3456 // z[kdx] = (jlong)product; 3457 // carry = (jlong)(product >>> 64); 3458 // } 3459 // z[xstart] = carry; 3460 // 3461 3462 Label L_first_loop, L_first_loop_exit; 3463 Label L_one_x, L_one_y, L_multiply; 3464 3465 subw(xstart, xstart, 1); 3466 bltz(xstart, L_one_x); 3467 3468 shadd(t0, xstart, x, t0, LogBytesPerInt); 3469 ld(x_xstart, Address(t0, 0)); 3470 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 3471 3472 bind(L_first_loop); 3473 subw(idx, idx, 1); 3474 bltz(idx, L_first_loop_exit); 3475 subw(idx, idx, 1); 3476 bltz(idx, L_one_y); 3477 3478 shadd(t0, idx, y, t0, LogBytesPerInt); 3479 ld(y_idx, Address(t0, 0)); 3480 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 3481 bind(L_multiply); 3482 3483 mulhu(t0, x_xstart, y_idx); 3484 mul(product, x_xstart, y_idx); 3485 cad(product, product, carry, t1); 3486 adc(carry, t0, zr, t1); 3487 3488 subw(kdx, kdx, 2); 3489 ror_imm(product, product, 32); // back to big-endian 3490 shadd(t0, kdx, z, t0, LogBytesPerInt); 3491 sd(product, Address(t0, 0)); 3492 3493 j(L_first_loop); 3494 3495 bind(L_one_y); 3496 lwu(y_idx, Address(y, 0)); 3497 j(L_multiply); 3498 3499 bind(L_one_x); 3500 lwu(x_xstart, Address(x, 0)); 3501 j(L_first_loop); 3502 3503 bind(L_first_loop_exit); 3504 } 3505 3506 /** 3507 * Multiply 128 bit by 128 bit. Unrolled inner loop. 3508 * 3509 */ 3510 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 3511 Register carry, Register carry2, 3512 Register idx, Register jdx, 3513 Register yz_idx1, Register yz_idx2, 3514 Register tmp, Register tmp3, Register tmp4, 3515 Register tmp6, Register product_hi) { 3516 // jlong carry, x[], y[], z[]; 3517 // int kdx = xstart+1; 3518 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3519 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 3520 // jlong carry2 = (jlong)(tmp3 >>> 64); 3521 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 3522 // carry = (jlong)(tmp4 >>> 64); 3523 // z[kdx+idx+1] = (jlong)tmp3; 3524 // z[kdx+idx] = (jlong)tmp4; 3525 // } 3526 // idx += 2; 3527 // if (idx > 0) { 3528 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 3529 // z[kdx+idx] = (jlong)yz_idx1; 3530 // carry = (jlong)(yz_idx1 >>> 64); 3531 // } 3532 // 3533 3534 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 3535 3536 srliw(jdx, idx, 2); 3537 3538 bind(L_third_loop); 3539 3540 subw(jdx, jdx, 1); 3541 bltz(jdx, L_third_loop_exit); 3542 subw(idx, idx, 4); 3543 3544 shadd(t0, idx, y, t0, LogBytesPerInt); 3545 ld(yz_idx2, Address(t0, 0)); 3546 ld(yz_idx1, Address(t0, wordSize)); 3547 3548 shadd(tmp6, idx, z, t0, LogBytesPerInt); 3549 3550 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 3551 ror_imm(yz_idx2, yz_idx2, 32); 3552 3553 ld(t1, Address(tmp6, 0)); 3554 ld(t0, Address(tmp6, wordSize)); 3555 3556 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3557 mulhu(tmp4, product_hi, yz_idx1); 3558 3559 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 3560 ror_imm(t1, t1, 32, tmp); 3561 3562 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 3563 mulhu(carry2, product_hi, yz_idx2); 3564 3565 cad(tmp3, tmp3, carry, carry); 3566 adc(tmp4, tmp4, zr, carry); 3567 cad(tmp3, tmp3, t0, t0); 3568 cadc(tmp4, tmp4, tmp, t0); 3569 adc(carry, carry2, zr, t0); 3570 cad(tmp4, tmp4, t1, carry2); 3571 adc(carry, carry, zr, carry2); 3572 3573 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 3574 ror_imm(tmp4, tmp4, 32); 3575 sd(tmp4, Address(tmp6, 0)); 3576 sd(tmp3, Address(tmp6, wordSize)); 3577 3578 j(L_third_loop); 3579 3580 bind(L_third_loop_exit); 3581 3582 andi(idx, idx, 0x3); 3583 beqz(idx, L_post_third_loop_done); 3584 3585 Label L_check_1; 3586 subw(idx, idx, 2); 3587 bltz(idx, L_check_1); 3588 3589 shadd(t0, idx, y, t0, LogBytesPerInt); 3590 ld(yz_idx1, Address(t0, 0)); 3591 ror_imm(yz_idx1, yz_idx1, 32); 3592 3593 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3594 mulhu(tmp4, product_hi, yz_idx1); 3595 3596 shadd(t0, idx, z, t0, LogBytesPerInt); 3597 ld(yz_idx2, Address(t0, 0)); 3598 ror_imm(yz_idx2, yz_idx2, 32, tmp); 3599 3600 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 3601 3602 ror_imm(tmp3, tmp3, 32, tmp); 3603 sd(tmp3, Address(t0, 0)); 3604 3605 bind(L_check_1); 3606 3607 andi(idx, idx, 0x1); 3608 subw(idx, idx, 1); 3609 bltz(idx, L_post_third_loop_done); 3610 shadd(t0, idx, y, t0, LogBytesPerInt); 3611 lwu(tmp4, Address(t0, 0)); 3612 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 3613 mulhu(carry2, tmp4, product_hi); 3614 3615 shadd(t0, idx, z, t0, LogBytesPerInt); 3616 lwu(tmp4, Address(t0, 0)); 3617 3618 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 3619 3620 shadd(t0, idx, z, t0, LogBytesPerInt); 3621 sw(tmp3, Address(t0, 0)); 3622 3623 slli(t0, carry2, 32); 3624 srli(carry, tmp3, 32); 3625 orr(carry, carry, t0); 3626 3627 bind(L_post_third_loop_done); 3628 } 3629 3630 /** 3631 * Code for BigInteger::multiplyToLen() intrinsic. 3632 * 3633 * x10: x 3634 * x11: xlen 3635 * x12: y 3636 * x13: ylen 3637 * x14: z 3638 * x15: zlen 3639 * x16: tmp1 3640 * x17: tmp2 3641 * x7: tmp3 3642 * x28: tmp4 3643 * x29: tmp5 3644 * x30: tmp6 3645 * x31: tmp7 3646 */ 3647 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 3648 Register z, Register zlen, 3649 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 3650 Register tmp5, Register tmp6, Register product_hi) { 3651 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 3652 3653 const Register idx = tmp1; 3654 const Register kdx = tmp2; 3655 const Register xstart = tmp3; 3656 3657 const Register y_idx = tmp4; 3658 const Register carry = tmp5; 3659 const Register product = xlen; 3660 const Register x_xstart = zlen; // reuse register 3661 3662 mv(idx, ylen); // idx = ylen; 3663 mv(kdx, zlen); // kdx = xlen+ylen; 3664 mv(carry, zr); // carry = 0; 3665 3666 Label L_multiply_64_x_64_loop, L_done; 3667 3668 subw(xstart, xlen, 1); 3669 bltz(xstart, L_done); 3670 3671 const Register jdx = tmp1; 3672 3673 if (AvoidUnalignedAccesses) { 3674 // Check if x and y are both 8-byte aligned. 3675 orr(t0, xlen, ylen); 3676 andi(t0, t0, 0x1); 3677 beqz(t0, L_multiply_64_x_64_loop); 3678 3679 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3680 shadd(t0, xstart, z, t0, LogBytesPerInt); 3681 sw(carry, Address(t0, 0)); 3682 3683 Label L_second_loop_unaligned; 3684 bind(L_second_loop_unaligned); 3685 mv(carry, zr); 3686 mv(jdx, ylen); 3687 subw(xstart, xstart, 1); 3688 bltz(xstart, L_done); 3689 sub(sp, sp, 2 * wordSize); 3690 sd(z, Address(sp, 0)); 3691 sd(zr, Address(sp, wordSize)); 3692 shadd(t0, xstart, z, t0, LogBytesPerInt); 3693 addi(z, t0, 4); 3694 shadd(t0, xstart, x, t0, LogBytesPerInt); 3695 lwu(product, Address(t0, 0)); 3696 Label L_third_loop, L_third_loop_exit; 3697 3698 blez(jdx, L_third_loop_exit); 3699 3700 bind(L_third_loop); 3701 subw(jdx, jdx, 1); 3702 shadd(t0, jdx, y, t0, LogBytesPerInt); 3703 lwu(t0, Address(t0, 0)); 3704 mul(t1, t0, product); 3705 add(t0, t1, carry); 3706 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 3707 lwu(t1, Address(tmp6, 0)); 3708 add(t0, t0, t1); 3709 sw(t0, Address(tmp6, 0)); 3710 srli(carry, t0, 32); 3711 bgtz(jdx, L_third_loop); 3712 3713 bind(L_third_loop_exit); 3714 ld(z, Address(sp, 0)); 3715 addi(sp, sp, 2 * wordSize); 3716 shadd(t0, xstart, z, t0, LogBytesPerInt); 3717 sw(carry, Address(t0, 0)); 3718 3719 j(L_second_loop_unaligned); 3720 } 3721 3722 bind(L_multiply_64_x_64_loop); 3723 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3724 3725 Label L_second_loop_aligned; 3726 beqz(kdx, L_second_loop_aligned); 3727 3728 Label L_carry; 3729 subw(kdx, kdx, 1); 3730 beqz(kdx, L_carry); 3731 3732 shadd(t0, kdx, z, t0, LogBytesPerInt); 3733 sw(carry, Address(t0, 0)); 3734 srli(carry, carry, 32); 3735 subw(kdx, kdx, 1); 3736 3737 bind(L_carry); 3738 shadd(t0, kdx, z, t0, LogBytesPerInt); 3739 sw(carry, Address(t0, 0)); 3740 3741 // Second and third (nested) loops. 3742 // 3743 // for (int i = xstart-1; i >= 0; i--) { // Second loop 3744 // carry = 0; 3745 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 3746 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 3747 // (z[k] & LONG_MASK) + carry; 3748 // z[k] = (int)product; 3749 // carry = product >>> 32; 3750 // } 3751 // z[i] = (int)carry; 3752 // } 3753 // 3754 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 3755 3756 bind(L_second_loop_aligned); 3757 mv(carry, zr); // carry = 0; 3758 mv(jdx, ylen); // j = ystart+1 3759 3760 subw(xstart, xstart, 1); // i = xstart-1; 3761 bltz(xstart, L_done); 3762 3763 sub(sp, sp, 4 * wordSize); 3764 sd(z, Address(sp, 0)); 3765 3766 Label L_last_x; 3767 shadd(t0, xstart, z, t0, LogBytesPerInt); 3768 addi(z, t0, 4); 3769 subw(xstart, xstart, 1); // i = xstart-1; 3770 bltz(xstart, L_last_x); 3771 3772 shadd(t0, xstart, x, t0, LogBytesPerInt); 3773 ld(product_hi, Address(t0, 0)); 3774 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 3775 3776 Label L_third_loop_prologue; 3777 bind(L_third_loop_prologue); 3778 3779 sd(ylen, Address(sp, wordSize)); 3780 sd(x, Address(sp, 2 * wordSize)); 3781 sd(xstart, Address(sp, 3 * wordSize)); 3782 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 3783 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 3784 ld(z, Address(sp, 0)); 3785 ld(ylen, Address(sp, wordSize)); 3786 ld(x, Address(sp, 2 * wordSize)); 3787 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 3788 addi(sp, sp, 4 * wordSize); 3789 3790 addiw(tmp3, xlen, 1); 3791 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3792 sw(carry, Address(t0, 0)); 3793 3794 subw(tmp3, tmp3, 1); 3795 bltz(tmp3, L_done); 3796 3797 srli(carry, carry, 32); 3798 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3799 sw(carry, Address(t0, 0)); 3800 j(L_second_loop_aligned); 3801 3802 // Next infrequent code is moved outside loops. 3803 bind(L_last_x); 3804 lwu(product_hi, Address(x, 0)); 3805 j(L_third_loop_prologue); 3806 3807 bind(L_done); 3808 } 3809 #endif 3810 3811 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 3812 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 3813 // shift 16 bits once. 3814 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 3815 if (UseZbb) { 3816 assert_different_registers(Rd, Rs, tmp1); 3817 int step = isLL ? 8 : 16; 3818 ctz(Rd, Rs); 3819 andi(tmp1, Rd, step - 1); 3820 sub(Rd, Rd, tmp1); 3821 return; 3822 } 3823 3824 assert_different_registers(Rd, Rs, tmp1, tmp2); 3825 Label Loop; 3826 int step = isLL ? 8 : 16; 3827 mv(Rd, -step); 3828 mv(tmp2, Rs); 3829 3830 bind(Loop); 3831 addi(Rd, Rd, step); 3832 andi(tmp1, tmp2, ((1 << step) - 1)); 3833 srli(tmp2, tmp2, step); 3834 beqz(tmp1, Loop); 3835 } 3836 3837 // This instruction reads adjacent 4 bytes from the lower half of source register, 3838 // inflate into a register, for example: 3839 // Rs: A7A6A5A4A3A2A1A0 3840 // Rd: 00A300A200A100A0 3841 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 3842 assert_different_registers(Rd, Rs, tmp1, tmp2); 3843 3844 mv(tmp1, 0xFF); 3845 mv(Rd, zr); 3846 for (int i = 0; i <= 3; i++) { 3847 andr(tmp2, Rs, tmp1); 3848 if (i) { 3849 slli(tmp2, tmp2, i * 8); 3850 } 3851 orr(Rd, Rd, tmp2); 3852 if (i != 3) { 3853 slli(tmp1, tmp1, 8); 3854 } 3855 } 3856 } 3857 3858 // This instruction reads adjacent 4 bytes from the upper half of source register, 3859 // inflate into a register, for example: 3860 // Rs: A7A6A5A4A3A2A1A0 3861 // Rd: 00A700A600A500A4 3862 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 3863 assert_different_registers(Rd, Rs, tmp1, tmp2); 3864 3865 mv(tmp1, 0xFF00000000); 3866 mv(Rd, zr); 3867 for (int i = 0; i <= 3; i++) { 3868 andr(tmp2, Rs, tmp1); 3869 orr(Rd, Rd, tmp2); 3870 srli(Rd, Rd, 8); 3871 if (i != 3) { 3872 slli(tmp1, tmp1, 8); 3873 } 3874 } 3875 } 3876 3877 // The size of the blocks erased by the zero_blocks stub. We must 3878 // handle anything smaller than this ourselves in zero_words(). 3879 const int MacroAssembler::zero_words_block_size = 8; 3880 3881 // zero_words() is used by C2 ClearArray patterns. It is as small as 3882 // possible, handling small word counts locally and delegating 3883 // anything larger to the zero_blocks stub. It is expanded many times 3884 // in compiled code, so it is important to keep it short. 3885 3886 // ptr: Address of a buffer to be zeroed. 3887 // cnt: Count in HeapWords. 3888 // 3889 // ptr, cnt, and t0 are clobbered. 3890 address MacroAssembler::zero_words(Register ptr, Register cnt) { 3891 assert(is_power_of_2(zero_words_block_size), "adjust this"); 3892 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 3893 assert_different_registers(cnt, t0); 3894 3895 BLOCK_COMMENT("zero_words {"); 3896 3897 mv(t0, zero_words_block_size); 3898 Label around, done, done16; 3899 bltu(cnt, t0, around); 3900 { 3901 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); 3902 assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); 3903 if (StubRoutines::riscv::complete()) { 3904 address tpc = trampoline_call(zero_blocks); 3905 if (tpc == NULL) { 3906 DEBUG_ONLY(reset_labels(around)); 3907 postcond(pc() == badAddress); 3908 return NULL; 3909 } 3910 } else { 3911 jal(zero_blocks); 3912 } 3913 } 3914 bind(around); 3915 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 3916 Label l; 3917 andi(t0, cnt, i); 3918 beqz(t0, l); 3919 for (int j = 0; j < i; j++) { 3920 sd(zr, Address(ptr, j * wordSize)); 3921 } 3922 addi(ptr, ptr, i * wordSize); 3923 bind(l); 3924 } 3925 { 3926 Label l; 3927 andi(t0, cnt, 1); 3928 beqz(t0, l); 3929 sd(zr, Address(ptr, 0)); 3930 bind(l); 3931 } 3932 3933 BLOCK_COMMENT("} zero_words"); 3934 postcond(pc() != badAddress); 3935 return pc(); 3936 } 3937 3938 #define SmallArraySize (18 * BytesPerLong) 3939 3940 // base: Address of a buffer to be zeroed, 8 bytes aligned. 3941 // cnt: Immediate count in HeapWords. 3942 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 3943 assert_different_registers(base, t0, t1); 3944 3945 BLOCK_COMMENT("zero_words {"); 3946 3947 if (cnt <= SmallArraySize / BytesPerLong) { 3948 for (int i = 0; i < (int)cnt; i++) { 3949 sd(zr, Address(base, i * wordSize)); 3950 } 3951 } else { 3952 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 3953 int remainder = cnt % unroll; 3954 for (int i = 0; i < remainder; i++) { 3955 sd(zr, Address(base, i * wordSize)); 3956 } 3957 3958 Label loop; 3959 Register cnt_reg = t0; 3960 Register loop_base = t1; 3961 cnt = cnt - remainder; 3962 mv(cnt_reg, cnt); 3963 add(loop_base, base, remainder * wordSize); 3964 bind(loop); 3965 sub(cnt_reg, cnt_reg, unroll); 3966 for (int i = 0; i < unroll; i++) { 3967 sd(zr, Address(loop_base, i * wordSize)); 3968 } 3969 add(loop_base, loop_base, unroll * wordSize); 3970 bnez(cnt_reg, loop); 3971 } 3972 3973 BLOCK_COMMENT("} zero_words"); 3974 } 3975 3976 // base: Address of a buffer to be filled, 8 bytes aligned. 3977 // cnt: Count in 8-byte unit. 3978 // value: Value to be filled with. 3979 // base will point to the end of the buffer after filling. 3980 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 3981 // Algorithm: 3982 // 3983 // t0 = cnt & 7 3984 // cnt -= t0 3985 // p += t0 3986 // switch (t0): 3987 // switch start: 3988 // do while cnt 3989 // cnt -= 8 3990 // p[-8] = value 3991 // case 7: 3992 // p[-7] = value 3993 // case 6: 3994 // p[-6] = value 3995 // // ... 3996 // case 1: 3997 // p[-1] = value 3998 // case 0: 3999 // p += 8 4000 // do-while end 4001 // switch end 4002 4003 assert_different_registers(base, cnt, value, t0, t1); 4004 4005 Label fini, skip, entry, loop; 4006 const int unroll = 8; // Number of sd instructions we'll unroll 4007 4008 beqz(cnt, fini); 4009 4010 andi(t0, cnt, unroll - 1); 4011 sub(cnt, cnt, t0); 4012 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4013 shadd(base, t0, base, t1, 3); 4014 la(t1, entry); 4015 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4016 sub(t1, t1, t0); 4017 jr(t1); 4018 4019 bind(loop); 4020 add(base, base, unroll * 8); 4021 for (int i = -unroll; i < 0; i++) { 4022 sd(value, Address(base, i * 8)); 4023 } 4024 bind(entry); 4025 sub(cnt, cnt, unroll); 4026 bgez(cnt, loop); 4027 4028 bind(fini); 4029 } 4030 4031 // Zero blocks of memory by using CBO.ZERO. 4032 // 4033 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4034 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4035 // zeroed in HeapWords. Returns the count of words left to be zeroed 4036 // in cnt. 4037 // 4038 // NOTE: This is intended to be used in the zero_blocks() stub. If 4039 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4040 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4041 Label initial_table_end, loop; 4042 4043 // Align base with cache line size. 4044 neg(tmp1, base); 4045 andi(tmp1, tmp1, CacheLineSize - 1); 4046 4047 // tmp1: the number of bytes to be filled to align the base with cache line size. 4048 add(base, base, tmp1); 4049 srai(tmp2, tmp1, 3); 4050 sub(cnt, cnt, tmp2); 4051 srli(tmp2, tmp1, 1); 4052 la(tmp1, initial_table_end); 4053 sub(tmp2, tmp1, tmp2); 4054 jr(tmp2); 4055 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4056 sd(zr, Address(base, i)); 4057 } 4058 bind(initial_table_end); 4059 4060 mv(tmp1, CacheLineSize / wordSize); 4061 bind(loop); 4062 cbo_zero(base); 4063 sub(cnt, cnt, tmp1); 4064 add(base, base, CacheLineSize); 4065 bge(cnt, tmp1, loop); 4066 } 4067 4068 #define FCVT_SAFE(FLOATCVT, FLOATEQ) \ 4069 void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4070 Label L_Okay; \ 4071 fscsr(zr); \ 4072 FLOATCVT(dst, src); \ 4073 frcsr(tmp); \ 4074 andi(tmp, tmp, 0x1E); \ 4075 beqz(tmp, L_Okay); \ 4076 FLOATEQ(tmp, src, src); \ 4077 bnez(tmp, L_Okay); \ 4078 mv(dst, zr); \ 4079 bind(L_Okay); \ 4080 } 4081 4082 FCVT_SAFE(fcvt_w_s, feq_s) 4083 FCVT_SAFE(fcvt_l_s, feq_s) 4084 FCVT_SAFE(fcvt_w_d, feq_d) 4085 FCVT_SAFE(fcvt_l_d, feq_d) 4086 4087 #undef FCVT_SAFE 4088 4089 #define FCMP(FLOATTYPE, FLOATSIG) \ 4090 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4091 FloatRegister Rs2, int unordered_result) { \ 4092 Label Ldone; \ 4093 if (unordered_result < 0) { \ 4094 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4095 /* installs 1 if gt else 0 */ \ 4096 flt_##FLOATSIG(result, Rs2, Rs1); \ 4097 /* Rs1 > Rs2, install 1 */ \ 4098 bgtz(result, Ldone); \ 4099 feq_##FLOATSIG(result, Rs1, Rs2); \ 4100 addi(result, result, -1); \ 4101 /* Rs1 = Rs2, install 0 */ \ 4102 /* NaN or Rs1 < Rs2, install -1 */ \ 4103 bind(Ldone); \ 4104 } else { \ 4105 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4106 /* installs 1 if gt or unordered else 0 */ \ 4107 flt_##FLOATSIG(result, Rs1, Rs2); \ 4108 /* Rs1 < Rs2, install -1 */ \ 4109 bgtz(result, Ldone); \ 4110 feq_##FLOATSIG(result, Rs1, Rs2); \ 4111 addi(result, result, -1); \ 4112 /* Rs1 = Rs2, install 0 */ \ 4113 /* NaN or Rs1 > Rs2, install 1 */ \ 4114 bind(Ldone); \ 4115 neg(result, result); \ 4116 } \ 4117 } 4118 4119 FCMP(float, s); 4120 FCMP(double, d); 4121 4122 #undef FCMP 4123 4124 // Zero words; len is in bytes 4125 // Destroys all registers except addr 4126 // len must be a nonzero multiple of wordSize 4127 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4128 assert_different_registers(addr, len, tmp, t0, t1); 4129 4130 #ifdef ASSERT 4131 { 4132 Label L; 4133 andi(t0, len, BytesPerWord - 1); 4134 beqz(t0, L); 4135 stop("len is not a multiple of BytesPerWord"); 4136 bind(L); 4137 } 4138 #endif // ASSERT 4139 4140 #ifndef PRODUCT 4141 block_comment("zero memory"); 4142 #endif // PRODUCT 4143 4144 Label loop; 4145 Label entry; 4146 4147 // Algorithm: 4148 // 4149 // t0 = cnt & 7 4150 // cnt -= t0 4151 // p += t0 4152 // switch (t0) { 4153 // do { 4154 // cnt -= 8 4155 // p[-8] = 0 4156 // case 7: 4157 // p[-7] = 0 4158 // case 6: 4159 // p[-6] = 0 4160 // ... 4161 // case 1: 4162 // p[-1] = 0 4163 // case 0: 4164 // p += 8 4165 // } while (cnt) 4166 // } 4167 4168 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4169 4170 srli(len, len, LogBytesPerWord); 4171 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4172 sub(len, len, t0); // cnt -= unroll 4173 // tmp always points to the end of the region we're about to zero 4174 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4175 la(t1, entry); 4176 slli(t0, t0, 2); 4177 sub(t1, t1, t0); 4178 jr(t1); 4179 bind(loop); 4180 sub(len, len, unroll); 4181 for (int i = -unroll; i < 0; i++) { 4182 sd(zr, Address(tmp, i * wordSize)); 4183 } 4184 bind(entry); 4185 add(tmp, tmp, unroll * wordSize); 4186 bnez(len, loop); 4187 } 4188 4189 // shift left by shamt and add 4190 // Rd = (Rs1 << shamt) + Rs2 4191 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4192 if (UseZba) { 4193 if (shamt == 1) { 4194 sh1add(Rd, Rs1, Rs2); 4195 return; 4196 } else if (shamt == 2) { 4197 sh2add(Rd, Rs1, Rs2); 4198 return; 4199 } else if (shamt == 3) { 4200 sh3add(Rd, Rs1, Rs2); 4201 return; 4202 } 4203 } 4204 4205 if (shamt != 0) { 4206 slli(tmp, Rs1, shamt); 4207 add(Rd, Rs2, tmp); 4208 } else { 4209 add(Rd, Rs1, Rs2); 4210 } 4211 } 4212 4213 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4214 if (UseZba && bits == 32) { 4215 zext_w(dst, src); 4216 return; 4217 } 4218 4219 if (UseZbb && bits == 16) { 4220 zext_h(dst, src); 4221 return; 4222 } 4223 4224 if (bits == 8) { 4225 zext_b(dst, src); 4226 } else { 4227 slli(dst, src, XLEN - bits); 4228 srli(dst, dst, XLEN - bits); 4229 } 4230 } 4231 4232 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4233 if (UseZbb) { 4234 if (bits == 8) { 4235 sext_b(dst, src); 4236 return; 4237 } else if (bits == 16) { 4238 sext_h(dst, src); 4239 return; 4240 } 4241 } 4242 4243 if (bits == 32) { 4244 sext_w(dst, src); 4245 } else { 4246 slli(dst, src, XLEN - bits); 4247 srai(dst, dst, XLEN - bits); 4248 } 4249 } 4250 4251 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 4252 { 4253 if (src1 == src2) { 4254 mv(dst, zr); 4255 return; 4256 } 4257 Label done; 4258 Register left = src1; 4259 Register right = src2; 4260 if (dst == src1) { 4261 assert_different_registers(dst, src2, tmp); 4262 mv(tmp, src1); 4263 left = tmp; 4264 } else if (dst == src2) { 4265 assert_different_registers(dst, src1, tmp); 4266 mv(tmp, src2); 4267 right = tmp; 4268 } 4269 4270 // installs 1 if gt else 0 4271 slt(dst, right, left); 4272 bnez(dst, done); 4273 slt(dst, left, right); 4274 // dst = -1 if lt; else if eq , dst = 0 4275 neg(dst, dst); 4276 bind(done); 4277 } 4278 4279 // The java_calling_convention describes stack locations as ideal slots on 4280 // a frame with no abi restrictions. Since we must observe abi restrictions 4281 // (like the placement of the register window) the slots must be biased by 4282 // the following value. 4283 static int reg2offset_in(VMReg r) { 4284 // Account for saved fp and ra 4285 // This should really be in_preserve_stack_slots 4286 return r->reg2stack() * VMRegImpl::stack_slot_size; 4287 } 4288 4289 static int reg2offset_out(VMReg r) { 4290 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 4291 } 4292 4293 // On 64 bit we will store integer like items to the stack as 4294 // 64 bits items (riscv64 abi) even though java would only store 4295 // 32bits for a parameter. On 32bit it will simply be 32 bits 4296 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 4297 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 4298 if (src.first()->is_stack()) { 4299 if (dst.first()->is_stack()) { 4300 // stack to stack 4301 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4302 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4303 } else { 4304 // stack to reg 4305 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4306 } 4307 } else if (dst.first()->is_stack()) { 4308 // reg to stack 4309 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4310 } else { 4311 if (dst.first() != src.first()) { 4312 // 32bits extend sign 4313 addw(dst.first()->as_Register(), src.first()->as_Register(), zr); 4314 } 4315 } 4316 } 4317 4318 // An oop arg. Must pass a handle not the oop itself 4319 void MacroAssembler::object_move(OopMap* map, 4320 int oop_handle_offset, 4321 int framesize_in_slots, 4322 VMRegPair src, 4323 VMRegPair dst, 4324 bool is_receiver, 4325 int* receiver_offset) { 4326 assert_cond(map != NULL && receiver_offset != NULL); 4327 4328 // must pass a handle. First figure out the location we use as a handle 4329 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 4330 4331 // See if oop is NULL if it is we need no handle 4332 4333 if (src.first()->is_stack()) { 4334 // Oop is already on the stack as an argument 4335 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 4336 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 4337 if (is_receiver) { 4338 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 4339 } 4340 4341 ld(t0, Address(fp, reg2offset_in(src.first()))); 4342 la(rHandle, Address(fp, reg2offset_in(src.first()))); 4343 // conditionally move a NULL 4344 Label notZero1; 4345 bnez(t0, notZero1); 4346 mv(rHandle, zr); 4347 bind(notZero1); 4348 } else { 4349 4350 // Oop is in a register we must store it to the space we reserve 4351 // on the stack for oop_handles and pass a handle if oop is non-NULL 4352 4353 const Register rOop = src.first()->as_Register(); 4354 int oop_slot = -1; 4355 if (rOop == j_rarg0) { 4356 oop_slot = 0; 4357 } else if (rOop == j_rarg1) { 4358 oop_slot = 1; 4359 } else if (rOop == j_rarg2) { 4360 oop_slot = 2; 4361 } else if (rOop == j_rarg3) { 4362 oop_slot = 3; 4363 } else if (rOop == j_rarg4) { 4364 oop_slot = 4; 4365 } else if (rOop == j_rarg5) { 4366 oop_slot = 5; 4367 } else if (rOop == j_rarg6) { 4368 oop_slot = 6; 4369 } else { 4370 assert(rOop == j_rarg7, "wrong register"); 4371 oop_slot = 7; 4372 } 4373 4374 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 4375 int offset = oop_slot * VMRegImpl::stack_slot_size; 4376 4377 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 4378 // Store oop in handle area, may be NULL 4379 sd(rOop, Address(sp, offset)); 4380 if (is_receiver) { 4381 *receiver_offset = offset; 4382 } 4383 4384 //rOop maybe the same as rHandle 4385 if (rOop == rHandle) { 4386 Label isZero; 4387 beqz(rOop, isZero); 4388 la(rHandle, Address(sp, offset)); 4389 bind(isZero); 4390 } else { 4391 Label notZero2; 4392 la(rHandle, Address(sp, offset)); 4393 bnez(rOop, notZero2); 4394 mv(rHandle, zr); 4395 bind(notZero2); 4396 } 4397 } 4398 4399 // If arg is on the stack then place it otherwise it is already in correct reg. 4400 if (dst.first()->is_stack()) { 4401 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 4402 } 4403 } 4404 4405 // A float arg may have to do float reg int reg conversion 4406 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 4407 assert(src.first()->is_stack() && dst.first()->is_stack() || 4408 src.first()->is_reg() && dst.first()->is_reg() || 4409 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4410 if (src.first()->is_stack()) { 4411 if (dst.first()->is_stack()) { 4412 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 4413 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 4414 } else if (dst.first()->is_Register()) { 4415 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4416 } else { 4417 ShouldNotReachHere(); 4418 } 4419 } else if (src.first() != dst.first()) { 4420 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4421 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4422 } else { 4423 ShouldNotReachHere(); 4424 } 4425 } 4426 } 4427 4428 // A long move 4429 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 4430 if (src.first()->is_stack()) { 4431 if (dst.first()->is_stack()) { 4432 // stack to stack 4433 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4434 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4435 } else { 4436 // stack to reg 4437 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4438 } 4439 } else if (dst.first()->is_stack()) { 4440 // reg to stack 4441 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4442 } else { 4443 if (dst.first() != src.first()) { 4444 mv(dst.first()->as_Register(), src.first()->as_Register()); 4445 } 4446 } 4447 } 4448 4449 // A double move 4450 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 4451 assert(src.first()->is_stack() && dst.first()->is_stack() || 4452 src.first()->is_reg() && dst.first()->is_reg() || 4453 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4454 if (src.first()->is_stack()) { 4455 if (dst.first()->is_stack()) { 4456 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4457 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4458 } else if (dst.first()-> is_Register()) { 4459 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4460 } else { 4461 ShouldNotReachHere(); 4462 } 4463 } else if (src.first() != dst.first()) { 4464 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4465 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4466 } else { 4467 ShouldNotReachHere(); 4468 } 4469 } 4470 } 4471 4472 void MacroAssembler::rt_call(address dest, Register tmp) { 4473 CodeBlob *cb = CodeCache::find_blob(dest); 4474 RuntimeAddress target(dest); 4475 if (cb) { 4476 far_call(target); 4477 } else { 4478 relocate(target.rspec(), [&] { 4479 int32_t offset; 4480 la_patchable(tmp, target, offset); 4481 jalr(x1, tmp, offset); 4482 }); 4483 } 4484 }