1 /* 2 * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 27 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 28 #include "gc/shenandoah/shenandoahForwarding.hpp" 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 30 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 31 #include "gc/shenandoah/shenandoahRuntime.hpp" 32 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 33 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "runtime/javaThread.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "utilities/macros.hpp" 38 #ifdef COMPILER1 39 #include "c1/c1_LIRAssembler.hpp" 40 #include "c1/c1_MacroAssembler.hpp" 41 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 42 #endif 43 44 #define __ masm-> 45 46 static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) { 47 if (handle_gpr) { 48 __ push_IU_state(); 49 } 50 51 if (handle_fp) { 52 // Some paths can be reached from the c2i adapter with live fp arguments in registers. 53 LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call")); 54 55 if (UseSSE >= 2) { 56 const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4); 57 __ subptr(rsp, xmm_size * 8); 58 __ movdbl(Address(rsp, xmm_size * 0), xmm0); 59 __ movdbl(Address(rsp, xmm_size * 1), xmm1); 60 __ movdbl(Address(rsp, xmm_size * 2), xmm2); 61 __ movdbl(Address(rsp, xmm_size * 3), xmm3); 62 __ movdbl(Address(rsp, xmm_size * 4), xmm4); 63 __ movdbl(Address(rsp, xmm_size * 5), xmm5); 64 __ movdbl(Address(rsp, xmm_size * 6), xmm6); 65 __ movdbl(Address(rsp, xmm_size * 7), xmm7); 66 } else if (UseSSE >= 1) { 67 const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2); 68 __ subptr(rsp, xmm_size * 8); 69 __ movflt(Address(rsp, xmm_size * 0), xmm0); 70 __ movflt(Address(rsp, xmm_size * 1), xmm1); 71 __ movflt(Address(rsp, xmm_size * 2), xmm2); 72 __ movflt(Address(rsp, xmm_size * 3), xmm3); 73 __ movflt(Address(rsp, xmm_size * 4), xmm4); 74 __ movflt(Address(rsp, xmm_size * 5), xmm5); 75 __ movflt(Address(rsp, xmm_size * 6), xmm6); 76 __ movflt(Address(rsp, xmm_size * 7), xmm7); 77 } else { 78 __ push_FPU_state(); 79 } 80 } 81 } 82 83 static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) { 84 if (handle_fp) { 85 if (UseSSE >= 2) { 86 const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4); 87 __ movdbl(xmm0, Address(rsp, xmm_size * 0)); 88 __ movdbl(xmm1, Address(rsp, xmm_size * 1)); 89 __ movdbl(xmm2, Address(rsp, xmm_size * 2)); 90 __ movdbl(xmm3, Address(rsp, xmm_size * 3)); 91 __ movdbl(xmm4, Address(rsp, xmm_size * 4)); 92 __ movdbl(xmm5, Address(rsp, xmm_size * 5)); 93 __ movdbl(xmm6, Address(rsp, xmm_size * 6)); 94 __ movdbl(xmm7, Address(rsp, xmm_size * 7)); 95 __ addptr(rsp, xmm_size * 8); 96 } else if (UseSSE >= 1) { 97 const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2); 98 __ movflt(xmm0, Address(rsp, xmm_size * 0)); 99 __ movflt(xmm1, Address(rsp, xmm_size * 1)); 100 __ movflt(xmm2, Address(rsp, xmm_size * 2)); 101 __ movflt(xmm3, Address(rsp, xmm_size * 3)); 102 __ movflt(xmm4, Address(rsp, xmm_size * 4)); 103 __ movflt(xmm5, Address(rsp, xmm_size * 5)); 104 __ movflt(xmm6, Address(rsp, xmm_size * 6)); 105 __ movflt(xmm7, Address(rsp, xmm_size * 7)); 106 __ addptr(rsp, xmm_size * 8); 107 } else { 108 __ pop_FPU_state(); 109 } 110 } 111 112 if (handle_gpr) { 113 __ pop_IU_state(); 114 } 115 } 116 117 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 118 Register src, Register dst, Register count) { 119 120 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 121 122 if (is_reference_type(type)) { 123 124 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { 125 #ifdef _LP64 126 Register thread = r15_thread; 127 #else 128 Register thread = rax; 129 if (thread == src || thread == dst || thread == count) { 130 thread = rbx; 131 } 132 if (thread == src || thread == dst || thread == count) { 133 thread = rcx; 134 } 135 if (thread == src || thread == dst || thread == count) { 136 thread = rdx; 137 } 138 __ push(thread); 139 __ get_thread(thread); 140 #endif 141 assert_different_registers(src, dst, count, thread); 142 143 Label done; 144 // Short-circuit if count == 0. 145 __ testptr(count, count); 146 __ jcc(Assembler::zero, done); 147 148 // Avoid runtime call when not active. 149 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 150 int flags; 151 if (ShenandoahSATBBarrier && dest_uninitialized) { 152 flags = ShenandoahHeap::HAS_FORWARDED; 153 } else { 154 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING; 155 } 156 __ testb(gc_state, flags); 157 __ jcc(Assembler::zero, done); 158 159 save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false); 160 161 #ifdef _LP64 162 assert(src == rdi, "expected"); 163 assert(dst == rsi, "expected"); 164 assert(count == rdx, "expected"); 165 if (UseCompressedOops) { 166 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), 167 src, dst, count); 168 } else 169 #endif 170 { 171 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), 172 src, dst, count); 173 } 174 175 restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false); 176 177 __ bind(done); 178 NOT_LP64(__ pop(thread);) 179 } 180 } 181 182 } 183 184 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 185 Register obj, 186 Register pre_val, 187 Register thread, 188 Register tmp, 189 bool tosca_live, 190 bool expand_call) { 191 192 if (ShenandoahSATBBarrier) { 193 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 194 } 195 } 196 197 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 198 Register obj, 199 Register pre_val, 200 Register thread, 201 Register tmp, 202 bool tosca_live, 203 bool expand_call) { 204 // If expand_call is true then we expand the call_VM_leaf macro 205 // directly to skip generating the check by 206 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 207 208 #ifdef _LP64 209 assert(thread == r15_thread, "must be"); 210 #endif // _LP64 211 212 Label done; 213 Label runtime; 214 215 assert(pre_val != noreg, "check this code"); 216 217 if (obj != noreg) { 218 assert_different_registers(obj, pre_val, tmp); 219 assert(pre_val != rax, "check this code"); 220 } 221 222 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 223 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 224 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 225 226 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 227 __ testb(gc_state, ShenandoahHeap::MARKING); 228 __ jcc(Assembler::zero, done); 229 230 // Do we need to load the previous value? 231 if (obj != noreg) { 232 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 233 } 234 235 // Is the previous value null? 236 __ cmpptr(pre_val, NULL_WORD); 237 __ jcc(Assembler::equal, done); 238 239 // Can we store original value in the thread's buffer? 240 // Is index == 0? 241 // (The index field is typed as size_t.) 242 243 __ movptr(tmp, index); // tmp := *index_adr 244 __ cmpptr(tmp, 0); // tmp == 0? 245 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 246 247 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 248 __ movptr(index, tmp); // *index_adr := tmp 249 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 250 251 // Record the previous value 252 __ movptr(Address(tmp, 0), pre_val); 253 __ jmp(done); 254 255 __ bind(runtime); 256 // save the live input values 257 if(tosca_live) __ push(rax); 258 259 if (obj != noreg && obj != rax) 260 __ push(obj); 261 262 if (pre_val != rax) 263 __ push(pre_val); 264 265 // Calling the runtime using the regular call_VM_leaf mechanism generates 266 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 267 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == nullptr. 268 // 269 // If we care generating the pre-barrier without a frame (e.g. in the 270 // intrinsified Reference.get() routine) then ebp might be pointing to 271 // the caller frame and so this check will most likely fail at runtime. 272 // 273 // Expanding the call directly bypasses the generation of the check. 274 // So when we do not have have a full interpreter frame on the stack 275 // expand_call should be passed true. 276 277 NOT_LP64( __ push(thread); ) 278 279 #ifdef _LP64 280 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 281 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 282 // Note: this should not accidentally smash thread, because thread is always r15. 283 assert(thread != c_rarg0, "smashed arg"); 284 if (c_rarg0 != pre_val) { 285 __ mov(c_rarg0, pre_val); 286 } 287 #endif 288 289 if (expand_call) { 290 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 291 #ifdef _LP64 292 if (c_rarg1 != thread) { 293 __ mov(c_rarg1, thread); 294 } 295 // Already moved pre_val into c_rarg0 above 296 #else 297 __ push(thread); 298 __ push(pre_val); 299 #endif 300 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 301 } else { 302 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 303 } 304 305 NOT_LP64( __ pop(thread); ) 306 307 // save the live input values 308 if (pre_val != rax) 309 __ pop(pre_val); 310 311 if (obj != noreg && obj != rax) 312 __ pop(obj); 313 314 if(tosca_live) __ pop(rax); 315 316 __ bind(done); 317 } 318 319 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) { 320 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 321 322 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); 323 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); 324 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); 325 bool is_native = ShenandoahBarrierSet::is_native_access(decorators); 326 bool is_narrow = UseCompressedOops && !is_native; 327 328 Label heap_stable, not_cset; 329 330 __ block_comment("load_reference_barrier { "); 331 332 // Check if GC is active 333 #ifdef _LP64 334 Register thread = r15_thread; 335 #else 336 Register thread = rcx; 337 if (thread == dst) { 338 thread = rbx; 339 } 340 __ push(thread); 341 __ get_thread(thread); 342 #endif 343 344 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 345 int flags = ShenandoahHeap::HAS_FORWARDED; 346 if (!is_strong) { 347 flags |= ShenandoahHeap::WEAK_ROOTS; 348 } 349 __ testb(gc_state, flags); 350 __ jcc(Assembler::zero, heap_stable); 351 352 Register tmp1 = noreg, tmp2 = noreg; 353 if (is_strong) { 354 // Test for object in cset 355 // Allocate temporary registers 356 for (int i = 0; i < 8; i++) { 357 Register r = as_Register(i); 358 if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) { 359 if (tmp1 == noreg) { 360 tmp1 = r; 361 } else { 362 tmp2 = r; 363 break; 364 } 365 } 366 } 367 assert(tmp1 != noreg, "tmp1 allocated"); 368 assert(tmp2 != noreg, "tmp2 allocated"); 369 assert_different_registers(tmp1, tmp2, src.base(), src.index()); 370 assert_different_registers(tmp1, tmp2, dst); 371 372 __ push(tmp1); 373 __ push(tmp2); 374 375 // Optimized cset-test 376 __ movptr(tmp1, dst); 377 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 378 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 379 __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1)); 380 __ testbool(tmp1); 381 __ jcc(Assembler::zero, not_cset); 382 } 383 384 save_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true); 385 386 // The rest is saved with the optimized path 387 388 uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4); 389 __ subptr(rsp, num_saved_regs * wordSize); 390 uint slot = num_saved_regs; 391 if (dst != rax) { 392 __ movptr(Address(rsp, (--slot) * wordSize), rax); 393 } 394 __ movptr(Address(rsp, (--slot) * wordSize), rcx); 395 __ movptr(Address(rsp, (--slot) * wordSize), rdx); 396 __ movptr(Address(rsp, (--slot) * wordSize), rdi); 397 __ movptr(Address(rsp, (--slot) * wordSize), rsi); 398 #ifdef _LP64 399 __ movptr(Address(rsp, (--slot) * wordSize), r8); 400 __ movptr(Address(rsp, (--slot) * wordSize), r9); 401 __ movptr(Address(rsp, (--slot) * wordSize), r10); 402 __ movptr(Address(rsp, (--slot) * wordSize), r11); 403 // r12-r15 are callee saved in all calling conventions 404 #endif 405 assert(slot == 0, "must use all slots"); 406 407 // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1. 408 #ifdef _LP64 409 Register arg0 = c_rarg0, arg1 = c_rarg1; 410 #else 411 Register arg0 = rdi, arg1 = rsi; 412 #endif 413 if (dst == arg1) { 414 __ lea(arg0, src); 415 __ xchgptr(arg1, arg0); 416 } else { 417 __ lea(arg1, src); 418 __ movptr(arg0, dst); 419 } 420 421 if (is_strong) { 422 if (is_narrow) { 423 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), arg0, arg1); 424 } else { 425 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), arg0, arg1); 426 } 427 } else if (is_weak) { 428 if (is_narrow) { 429 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1); 430 } else { 431 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1); 432 } 433 } else { 434 assert(is_phantom, "only remaining strength"); 435 assert(!is_narrow, "phantom access cannot be narrow"); 436 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), arg0, arg1); 437 } 438 439 #ifdef _LP64 440 __ movptr(r11, Address(rsp, (slot++) * wordSize)); 441 __ movptr(r10, Address(rsp, (slot++) * wordSize)); 442 __ movptr(r9, Address(rsp, (slot++) * wordSize)); 443 __ movptr(r8, Address(rsp, (slot++) * wordSize)); 444 #endif 445 __ movptr(rsi, Address(rsp, (slot++) * wordSize)); 446 __ movptr(rdi, Address(rsp, (slot++) * wordSize)); 447 __ movptr(rdx, Address(rsp, (slot++) * wordSize)); 448 __ movptr(rcx, Address(rsp, (slot++) * wordSize)); 449 450 if (dst != rax) { 451 __ movptr(dst, rax); 452 __ movptr(rax, Address(rsp, (slot++) * wordSize)); 453 } 454 455 assert(slot == num_saved_regs, "must use all slots"); 456 __ addptr(rsp, num_saved_regs * wordSize); 457 458 restore_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true); 459 460 __ bind(not_cset); 461 462 if (is_strong) { 463 __ pop(tmp2); 464 __ pop(tmp1); 465 } 466 467 __ bind(heap_stable); 468 469 __ block_comment("} load_reference_barrier"); 470 471 #ifndef _LP64 472 __ pop(thread); 473 #endif 474 } 475 476 void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { 477 if (ShenandoahIUBarrier) { 478 iu_barrier_impl(masm, dst, tmp); 479 } 480 } 481 482 void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 483 assert(ShenandoahIUBarrier, "should be enabled"); 484 485 if (dst == noreg) return; 486 487 if (ShenandoahIUBarrier) { 488 save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true); 489 490 #ifdef _LP64 491 Register thread = r15_thread; 492 #else 493 Register thread = rcx; 494 if (thread == dst || thread == tmp) { 495 thread = rdi; 496 } 497 if (thread == dst || thread == tmp) { 498 thread = rbx; 499 } 500 __ get_thread(thread); 501 #endif 502 assert_different_registers(dst, tmp, thread); 503 504 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 505 506 restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true); 507 } 508 } 509 510 // 511 // Arguments: 512 // 513 // Inputs: 514 // src: oop location, might be clobbered 515 // tmp1: scratch register, might not be valid. 516 // 517 // Output: 518 // dst: oop loaded from src location 519 // 520 // Kill: 521 // tmp1 (if it is valid) 522 // 523 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 524 Register dst, Address src, Register tmp1, Register tmp_thread) { 525 // 1: non-reference load, no additional barrier is needed 526 if (!is_reference_type(type)) { 527 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 528 return; 529 } 530 531 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 532 533 // 2: load a reference from src location and apply LRB if needed 534 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 535 Register result_dst = dst; 536 bool use_tmp1_for_dst = false; 537 538 // Preserve src location for LRB 539 if (dst == src.base() || dst == src.index()) { 540 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 541 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 542 dst = tmp1; 543 use_tmp1_for_dst = true; 544 } else { 545 dst = rdi; 546 __ push(dst); 547 } 548 assert_different_registers(dst, src.base(), src.index()); 549 } 550 551 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 552 553 load_reference_barrier(masm, dst, src, decorators); 554 555 // Move loaded oop to final destination 556 if (dst != result_dst) { 557 __ movptr(result_dst, dst); 558 559 if (!use_tmp1_for_dst) { 560 __ pop(dst); 561 } 562 563 dst = result_dst; 564 } 565 } else { 566 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 567 } 568 569 // 3: apply keep-alive barrier if needed 570 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 571 save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true); 572 573 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 574 assert_different_registers(dst, tmp1, tmp_thread); 575 if (!thread->is_valid()) { 576 thread = rdx; 577 } 578 NOT_LP64(__ get_thread(thread)); 579 // Generate the SATB pre-barrier code to log the value of 580 // the referent field in an SATB buffer. 581 shenandoah_write_barrier_pre(masm /* masm */, 582 noreg /* obj */, 583 dst /* pre_val */, 584 thread /* thread */, 585 tmp1 /* tmp */, 586 true /* tosca_live */, 587 true /* expand_call */); 588 589 restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true); 590 } 591 } 592 593 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 594 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 595 596 bool on_oop = is_reference_type(type); 597 bool in_heap = (decorators & IN_HEAP) != 0; 598 bool as_normal = (decorators & AS_NORMAL) != 0; 599 if (on_oop && in_heap) { 600 bool needs_pre_barrier = as_normal; 601 602 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 603 // flatten object address if needed 604 // We do it regardless of precise because we need the registers 605 if (dst.index() == noreg && dst.disp() == 0) { 606 if (dst.base() != tmp1) { 607 __ movptr(tmp1, dst.base()); 608 } 609 } else { 610 __ lea(tmp1, dst); 611 } 612 613 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 614 615 #ifndef _LP64 616 __ get_thread(rthread); 617 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 618 imasm->save_bcp(); 619 #endif 620 621 if (needs_pre_barrier) { 622 shenandoah_write_barrier_pre(masm /*masm*/, 623 tmp1 /* obj */, 624 tmp2 /* pre_val */, 625 rthread /* thread */, 626 tmp3 /* tmp */, 627 val != noreg /* tosca_live */, 628 false /* expand_call */); 629 } 630 if (val == noreg) { 631 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg); 632 } else { 633 iu_barrier(masm, val, tmp3); 634 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg); 635 } 636 NOT_LP64(imasm->restore_bcp()); 637 } else { 638 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); 639 } 640 } 641 642 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 643 Register obj, Register tmp, Label& slowpath) { 644 Label done; 645 // Resolve jobject 646 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 647 648 // Check for null. 649 __ testptr(obj, obj); 650 __ jcc(Assembler::zero, done); 651 652 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 653 __ testb(gc_state, ShenandoahHeap::EVACUATION); 654 __ jccb(Assembler::notZero, slowpath); 655 __ bind(done); 656 } 657 658 // Special Shenandoah CAS implementation that handles false negatives 659 // due to concurrent evacuation. 660 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 661 Register res, Address addr, Register oldval, Register newval, 662 bool exchange, Register tmp1, Register tmp2) { 663 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 664 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 665 assert_different_registers(oldval, tmp1, tmp2); 666 assert_different_registers(newval, tmp1, tmp2); 667 668 Label L_success, L_failure; 669 670 // Remember oldval for retry logic below 671 #ifdef _LP64 672 if (UseCompressedOops) { 673 __ movl(tmp1, oldval); 674 } else 675 #endif 676 { 677 __ movptr(tmp1, oldval); 678 } 679 680 // Step 1. Fast-path. 681 // 682 // Try to CAS with given arguments. If successful, then we are done. 683 684 #ifdef _LP64 685 if (UseCompressedOops) { 686 __ lock(); 687 __ cmpxchgl(newval, addr); 688 } else 689 #endif 690 { 691 __ lock(); 692 __ cmpxchgptr(newval, addr); 693 } 694 __ jcc(Assembler::equal, L_success); 695 696 // Step 2. CAS had failed. This may be a false negative. 697 // 698 // The trouble comes when we compare the to-space pointer with the from-space 699 // pointer to the same object. To resolve this, it will suffice to resolve 700 // the value from memory -- this will give both to-space pointers. 701 // If they mismatch, then it was a legitimate failure. 702 // 703 // Before reaching to resolve sequence, see if we can avoid the whole shebang 704 // with filters. 705 706 // Filter: when offending in-memory value is null, the failure is definitely legitimate 707 __ testptr(oldval, oldval); 708 __ jcc(Assembler::zero, L_failure); 709 710 // Filter: when heap is stable, the failure is definitely legitimate 711 #ifdef _LP64 712 const Register thread = r15_thread; 713 #else 714 const Register thread = tmp2; 715 __ get_thread(thread); 716 #endif 717 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 718 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 719 __ jcc(Assembler::zero, L_failure); 720 721 #ifdef _LP64 722 if (UseCompressedOops) { 723 __ movl(tmp2, oldval); 724 __ decode_heap_oop(tmp2); 725 } else 726 #endif 727 { 728 __ movptr(tmp2, oldval); 729 } 730 731 // Decode offending in-memory value. 732 // Test if-forwarded 733 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 734 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 735 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 736 737 // Load and mask forwarding pointer 738 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 739 __ shrptr(tmp2, 2); 740 __ shlptr(tmp2, 2); 741 742 #ifdef _LP64 743 if (UseCompressedOops) { 744 __ decode_heap_oop(tmp1); // decode for comparison 745 } 746 #endif 747 748 // Now we have the forwarded offender in tmp2. 749 // Compare and if they don't match, we have legitimate failure 750 __ cmpptr(tmp1, tmp2); 751 __ jcc(Assembler::notEqual, L_failure); 752 753 // Step 3. Need to fix the memory ptr before continuing. 754 // 755 // At this point, we have from-space oldval in the register, and its to-space 756 // address is in tmp2. Let's try to update it into memory. We don't care if it 757 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 758 // If this fixup fails, this means somebody else beat us to it, and necessarily 759 // with to-space ptr store. We still have to do the retry, because the GC might 760 // have updated the reference for us. 761 762 #ifdef _LP64 763 if (UseCompressedOops) { 764 __ encode_heap_oop(tmp2); // previously decoded at step 2. 765 } 766 #endif 767 768 #ifdef _LP64 769 if (UseCompressedOops) { 770 __ lock(); 771 __ cmpxchgl(tmp2, addr); 772 } else 773 #endif 774 { 775 __ lock(); 776 __ cmpxchgptr(tmp2, addr); 777 } 778 779 // Step 4. Try to CAS again. 780 // 781 // This is guaranteed not to have false negatives, because oldval is definitely 782 // to-space, and memory pointer is to-space as well. Nothing is able to store 783 // from-space ptr into memory anymore. Make sure oldval is restored, after being 784 // garbled during retries. 785 // 786 #ifdef _LP64 787 if (UseCompressedOops) { 788 __ movl(oldval, tmp2); 789 } else 790 #endif 791 { 792 __ movptr(oldval, tmp2); 793 } 794 795 #ifdef _LP64 796 if (UseCompressedOops) { 797 __ lock(); 798 __ cmpxchgl(newval, addr); 799 } else 800 #endif 801 { 802 __ lock(); 803 __ cmpxchgptr(newval, addr); 804 } 805 if (!exchange) { 806 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 807 } 808 809 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 810 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 811 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 812 813 if (exchange) { 814 __ bind(L_failure); 815 __ bind(L_success); 816 } else { 817 assert(res != noreg, "need result register"); 818 819 Label exit; 820 __ bind(L_failure); 821 __ xorptr(res, res); 822 __ jmpb(exit); 823 824 __ bind(L_success); 825 __ movptr(res, 1); 826 __ bind(exit); 827 } 828 } 829 830 #undef __ 831 832 #ifdef COMPILER1 833 834 #define __ ce->masm()-> 835 836 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 837 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 838 // At this point we know that marking is in progress. 839 // If do_load() is true then we have to emit the 840 // load of the previous value; otherwise it has already 841 // been loaded into _pre_val. 842 843 __ bind(*stub->entry()); 844 assert(stub->pre_val()->is_register(), "Precondition."); 845 846 Register pre_val_reg = stub->pre_val()->as_register(); 847 848 if (stub->do_load()) { 849 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); 850 } 851 852 __ cmpptr(pre_val_reg, NULL_WORD); 853 __ jcc(Assembler::equal, *stub->continuation()); 854 ce->store_parameter(stub->pre_val()->as_register(), 0); 855 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 856 __ jmp(*stub->continuation()); 857 858 } 859 860 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 861 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 862 __ bind(*stub->entry()); 863 864 DecoratorSet decorators = stub->decorators(); 865 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); 866 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); 867 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); 868 bool is_native = ShenandoahBarrierSet::is_native_access(decorators); 869 870 Register obj = stub->obj()->as_register(); 871 Register res = stub->result()->as_register(); 872 Register addr = stub->addr()->as_pointer_register(); 873 Register tmp1 = stub->tmp1()->as_register(); 874 Register tmp2 = stub->tmp2()->as_register(); 875 assert_different_registers(obj, res, addr, tmp1, tmp2); 876 877 Label slow_path; 878 879 assert(res == rax, "result must arrive in rax"); 880 881 if (res != obj) { 882 __ mov(res, obj); 883 } 884 885 if (is_strong) { 886 // Check for object being in the collection set. 887 __ mov(tmp1, res); 888 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 889 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 890 #ifdef _LP64 891 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 892 __ testbool(tmp2); 893 #else 894 // On x86_32, C1 register allocator can give us the register without 8-bit support. 895 // Do the full-register access and test to avoid compilation failures. 896 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 897 __ testptr(tmp2, 0xFF); 898 #endif 899 __ jcc(Assembler::zero, *stub->continuation()); 900 } 901 902 __ bind(slow_path); 903 ce->store_parameter(res, 0); 904 ce->store_parameter(addr, 1); 905 if (is_strong) { 906 if (is_native) { 907 __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); 908 } else { 909 __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); 910 } 911 } else if (is_weak) { 912 __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); 913 } else { 914 assert(is_phantom, "only remaining strength"); 915 __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); 916 } 917 __ jmp(*stub->continuation()); 918 } 919 920 #undef __ 921 922 #define __ sasm-> 923 924 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 925 __ prologue("shenandoah_pre_barrier", false); 926 // arg0 : previous value of memory 927 928 __ push(rax); 929 __ push(rdx); 930 931 const Register pre_val = rax; 932 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 933 const Register tmp = rdx; 934 935 NOT_LP64(__ get_thread(thread);) 936 937 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 938 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 939 940 Label done; 941 Label runtime; 942 943 // Is SATB still active? 944 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 945 __ testb(gc_state, ShenandoahHeap::MARKING); 946 __ jcc(Assembler::zero, done); 947 948 // Can we store original value in the thread's buffer? 949 950 __ movptr(tmp, queue_index); 951 __ testptr(tmp, tmp); 952 __ jcc(Assembler::zero, runtime); 953 __ subptr(tmp, wordSize); 954 __ movptr(queue_index, tmp); 955 __ addptr(tmp, buffer); 956 957 // prev_val (rax) 958 __ load_parameter(0, pre_val); 959 __ movptr(Address(tmp, 0), pre_val); 960 __ jmp(done); 961 962 __ bind(runtime); 963 964 __ save_live_registers_no_oop_map(true); 965 966 // load the pre-value 967 __ load_parameter(0, rcx); 968 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 969 970 __ restore_live_registers(true); 971 972 __ bind(done); 973 974 __ pop(rdx); 975 __ pop(rax); 976 977 __ epilogue(); 978 } 979 980 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) { 981 __ prologue("shenandoah_load_reference_barrier", false); 982 // arg0 : object to be resolved 983 984 __ save_live_registers_no_oop_map(true); 985 986 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); 987 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); 988 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); 989 bool is_native = ShenandoahBarrierSet::is_native_access(decorators); 990 991 #ifdef _LP64 992 __ load_parameter(0, c_rarg0); 993 __ load_parameter(1, c_rarg1); 994 if (is_strong) { 995 if (is_native) { 996 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1); 997 } else { 998 if (UseCompressedOops) { 999 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1); 1000 } else { 1001 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1); 1002 } 1003 } 1004 } else if (is_weak) { 1005 assert(!is_native, "weak must not be called off-heap"); 1006 if (UseCompressedOops) { 1007 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1); 1008 } else { 1009 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1); 1010 } 1011 } else { 1012 assert(is_phantom, "only remaining strength"); 1013 assert(is_native, "phantom must only be called off-heap"); 1014 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1); 1015 } 1016 #else 1017 __ load_parameter(0, rax); 1018 __ load_parameter(1, rbx); 1019 if (is_strong) { 1020 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), rax, rbx); 1021 } else if (is_weak) { 1022 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx); 1023 } else { 1024 assert(is_phantom, "only remaining strength"); 1025 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), rax, rbx); 1026 } 1027 #endif 1028 1029 __ restore_live_registers_except_rax(true); 1030 1031 __ epilogue(); 1032 } 1033 1034 #undef __ 1035 1036 #endif // COMPILER1