1 /* 2 * Copyright (c) 2018, 2020 Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "c1/c1_MacroAssembler.hpp" 26 #include "c1/c1_LIRAssembler.hpp" 27 #include "macroAssembler_x86.hpp" 28 #include "shenandoahBarrierSetAssembler_x86.hpp" 29 #include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" 30 #include "gc_implementation/shenandoah/shenandoahForwarding.hpp" 31 #include "gc_implementation/shenandoah/shenandoahHeap.hpp" 32 #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" 33 #include "gc_implementation/shenandoah/shenandoahRuntime.hpp" 34 #include "gc_implementation/shenandoah/c1/shenandoahBarrierSetC1.hpp" 35 #include "runtime/stubCodeGenerator.hpp" 36 37 ShenandoahBarrierSetAssembler* ShenandoahBarrierSetAssembler::bsasm() { 38 return ShenandoahBarrierSet::barrier_set()->bsasm(); 39 } 40 41 #define __ masm-> 42 43 static void save_xmm_registers(MacroAssembler* masm) { 44 __ subptr(rsp, 64); 45 __ movdbl(Address(rsp, 0), xmm0); 46 __ movdbl(Address(rsp, 8), xmm1); 47 __ movdbl(Address(rsp, 16), xmm2); 48 __ movdbl(Address(rsp, 24), xmm3); 49 __ movdbl(Address(rsp, 32), xmm4); 50 __ movdbl(Address(rsp, 40), xmm5); 51 __ movdbl(Address(rsp, 48), xmm6); 52 __ movdbl(Address(rsp, 56), xmm7); 53 } 54 55 static void restore_xmm_registers(MacroAssembler* masm) { 56 __ movdbl(xmm0, Address(rsp, 0)); 57 __ movdbl(xmm1, Address(rsp, 8)); 58 __ movdbl(xmm2, Address(rsp, 16)); 59 __ movdbl(xmm3, Address(rsp, 24)); 60 __ movdbl(xmm4, Address(rsp, 32)); 61 __ movdbl(xmm5, Address(rsp, 40)); 62 __ movdbl(xmm6, Address(rsp, 48)); 63 __ movdbl(xmm7, Address(rsp, 56)); 64 __ addptr(rsp, 64); 65 } 66 67 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, bool dest_uninitialized, 68 Register src, Register dst, Register count) { 69 70 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahStoreValEnqueueBarrier || ShenandoahLoadRefBarrier) { 71 #ifdef _LP64 72 Register thread = r15_thread; 73 #else 74 Register thread = rax; 75 if (thread == src || thread == dst || thread == count) { 76 thread = rbx; 77 } 78 if (thread == src || thread == dst || thread == count) { 79 thread = rcx; 80 } 81 if (thread == src || thread == dst || thread == count) { 82 thread = rdx; 83 } 84 __ push(thread); 85 __ get_thread(thread); 86 #endif 87 assert_different_registers(src, dst, count, thread); 88 89 Label done; 90 // Short-circuit if count == 0. 91 __ testptr(count, count); 92 __ jcc(Assembler::zero, done); 93 94 // Avoid runtime call when not active. 95 Address gc_state(thread, in_bytes(JavaThread::gc_state_offset())); 96 int flags; 97 if (ShenandoahSATBBarrier && dest_uninitialized) { 98 flags = ShenandoahHeap::HAS_FORWARDED; 99 } else { 100 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING; 101 } 102 __ testb(gc_state, flags); 103 __ jcc(Assembler::zero, done); 104 105 __ pusha(); // push registers 106 107 #ifdef _LP64 108 assert(src == rdi, "expected"); 109 assert(dst == rsi, "expected"); 110 // commented-out for generate_conjoint_long_oop_copy(), call_VM_leaf() will move 111 // register into right place. 112 // assert(count == rdx, "expected"); 113 if (UseCompressedOops) { 114 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), 115 src, dst, count); 116 } else 117 #endif 118 { 119 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), 120 src, dst, count); 121 } 122 123 __ popa(); 124 __ bind(done); 125 NOT_LP64(__ pop(thread);) 126 } 127 } 128 129 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 130 if (!ShenandoahLoadRefBarrier) { 131 return; 132 } 133 134 bool is_narrow = UseCompressedOops; 135 136 Label heap_stable, not_cset; 137 138 __ block_comment("load_reference_barrier { "); 139 140 // Check if GC is active 141 #ifdef _LP64 142 Register thread = r15_thread; 143 #else 144 Register thread = rsi; 145 if (thread == dst) { 146 thread = rbx; 147 } 148 assert_different_registers(dst, src.base(), src.index(), thread); 149 __ push(thread); 150 __ get_thread(thread); 151 #endif 152 153 Address gc_state(thread, in_bytes(JavaThread::gc_state_offset())); 154 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 155 __ jcc(Assembler::zero, heap_stable); 156 157 Register tmp1 = noreg, tmp2 = noreg; 158 159 // Test for object in cset 160 // Allocate temporary registers 161 for (int i = 0; i < 8; i++) { 162 Register r = as_Register(i); 163 if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) { 164 if (tmp1 == noreg) { 165 tmp1 = r; 166 } else { 167 tmp2 = r; 168 break; 169 } 170 } 171 } 172 assert(tmp1 != noreg, "tmp1 allocated"); 173 assert(tmp2 != noreg, "tmp2 allocated"); 174 assert_different_registers(tmp1, tmp2, src.base(), src.index()); 175 assert_different_registers(tmp1, tmp2, dst); 176 177 __ push(tmp1); 178 __ push(tmp2); 179 180 // Optimized cset-test 181 __ movptr(tmp1, dst); 182 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 183 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 184 __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1)); 185 __ testbool(tmp1); 186 __ jcc(Assembler::zero, not_cset); 187 188 uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4); 189 __ subptr(rsp, num_saved_regs * wordSize); 190 uint slot = num_saved_regs; 191 if (dst != rax) { 192 __ movptr(Address(rsp, (--slot) * wordSize), rax); 193 } 194 __ movptr(Address(rsp, (--slot) * wordSize), rcx); 195 __ movptr(Address(rsp, (--slot) * wordSize), rdx); 196 __ movptr(Address(rsp, (--slot) * wordSize), rdi); 197 __ movptr(Address(rsp, (--slot) * wordSize), rsi); 198 #ifdef _LP64 199 __ movptr(Address(rsp, (--slot) * wordSize), r8); 200 __ movptr(Address(rsp, (--slot) * wordSize), r9); 201 __ movptr(Address(rsp, (--slot) * wordSize), r10); 202 __ movptr(Address(rsp, (--slot) * wordSize), r11); 203 // r12-r15 are callee saved in all calling conventions 204 #endif 205 assert(slot == 0, "must use all slots"); 206 207 // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1. 208 #ifdef _LP64 209 Register arg0 = c_rarg0, arg1 = c_rarg1; 210 #else 211 Register arg0 = rdi, arg1 = rsi; 212 #endif 213 if (dst == arg1) { 214 __ lea(arg0, src); 215 __ xchgptr(arg1, arg0); 216 } else { 217 __ lea(arg1, src); 218 __ movptr(arg0, dst); 219 } 220 221 save_xmm_registers(masm); 222 if (is_narrow) { 223 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), arg0, arg1); 224 } else { 225 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), arg0, arg1); 226 } 227 restore_xmm_registers(masm); 228 229 #ifdef _LP64 230 __ movptr(r11, Address(rsp, (slot++) * wordSize)); 231 __ movptr(r10, Address(rsp, (slot++) * wordSize)); 232 __ movptr(r9, Address(rsp, (slot++) * wordSize)); 233 __ movptr(r8, Address(rsp, (slot++) * wordSize)); 234 #endif 235 __ movptr(rsi, Address(rsp, (slot++) * wordSize)); 236 __ movptr(rdi, Address(rsp, (slot++) * wordSize)); 237 __ movptr(rdx, Address(rsp, (slot++) * wordSize)); 238 __ movptr(rcx, Address(rsp, (slot++) * wordSize)); 239 240 if (dst != rax) { 241 __ movptr(dst, rax); 242 __ movptr(rax, Address(rsp, (slot++) * wordSize)); 243 } 244 245 assert(slot == num_saved_regs, "must use all slots"); 246 __ addptr(rsp, num_saved_regs * wordSize); 247 248 __ bind(not_cset); 249 250 __ pop(tmp2); 251 __ pop(tmp1); 252 253 __ bind(heap_stable); 254 255 __ block_comment("} load_reference_barrier"); 256 257 #ifndef _LP64 258 __ pop(thread); 259 #endif 260 } 261 262 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 263 if (ShenandoahStoreValEnqueueBarrier) { 264 storeval_barrier_impl(masm, dst, tmp); 265 } 266 } 267 268 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 269 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 270 271 if (dst == noreg) return; 272 273 if (ShenandoahStoreValEnqueueBarrier) { 274 // The set of registers to be saved+restored is the same as in the write-barrier above. 275 // Those are the commonly used registers in the interpreter. 276 __ pusha(); 277 // __ push_callee_saved_registers(); 278 __ subptr(rsp, 2 * Interpreter::stackElementSize); 279 __ movdbl(Address(rsp, 0), xmm0); 280 281 #ifdef _LP64 282 Register thread = r15_thread; 283 #else 284 Register thread = rcx; 285 if (thread == dst || thread == tmp) { 286 thread = rdi; 287 } 288 if (thread == dst || thread == tmp) { 289 thread = rbx; 290 } 291 __ get_thread(thread); 292 #endif 293 assert_different_registers(dst, tmp, thread); 294 295 __ g1_write_barrier_pre(noreg, dst, thread, tmp, true, false); 296 __ movdbl(xmm0, Address(rsp, 0)); 297 __ addptr(rsp, 2 * Interpreter::stackElementSize); 298 //__ pop_callee_saved_registers(); 299 __ popa(); 300 } 301 } 302 303 void ShenandoahBarrierSetAssembler::load_heap_oop(MacroAssembler* masm, Register dst, Address src) { 304 Register result_dst = dst; 305 // Preserve src location for LRB 306 if (dst == src.base() || dst == src.index()) { 307 dst = rdi; 308 __ push(dst); 309 assert_different_registers(dst, src.base(), src.index()); 310 } 311 312 #ifdef _LP64 313 // FIXME: Must change all places where we try to load the klass. 314 if (UseCompressedOops) { 315 __ movl(dst, src); 316 __ decode_heap_oop(dst); 317 } else 318 #endif 319 __ movptr(dst, src); 320 321 load_reference_barrier(masm, dst, src); 322 323 // Move loaded oop to final destination 324 if (dst != result_dst) { 325 __ movptr(result_dst, dst); 326 __ pop(dst); 327 } 328 } 329 330 // Special Shenandoah CAS implementation that handles false negatives 331 // due to concurrent evacuation. 332 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 333 Register res, Address addr, Register oldval, Register newval, 334 bool exchange, Register tmp1, Register tmp2) { 335 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 336 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 337 assert_different_registers(oldval, newval, tmp1, tmp2); 338 339 Label L_success, L_failure; 340 341 // Remember oldval for retry logic below 342 #ifdef _LP64 343 if (UseCompressedOops) { 344 __ movl(tmp1, oldval); 345 } else 346 #endif 347 { 348 __ movptr(tmp1, oldval); 349 } 350 351 // Step 1. Fast-path. 352 // 353 // Try to CAS with given arguments. If successful, then we are done. 354 355 if (os::is_MP()) __ lock(); 356 #ifdef _LP64 357 if (UseCompressedOops) { 358 __ cmpxchgl(newval, addr); 359 } else 360 #endif 361 { 362 __ cmpxchgptr(newval, addr); 363 } 364 __ jcc(Assembler::equal, L_success); 365 366 // Step 2. CAS had failed. This may be a false negative. 367 // 368 // The trouble comes when we compare the to-space pointer with the from-space 369 // pointer to the same object. To resolve this, it will suffice to resolve 370 // the value from memory -- this will give both to-space pointers. 371 // If they mismatch, then it was a legitimate failure. 372 // 373 // Before reaching to resolve sequence, see if we can avoid the whole shebang 374 // with filters. 375 376 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 377 __ testptr(oldval, oldval); 378 __ jcc(Assembler::zero, L_failure); 379 380 // Filter: when heap is stable, the failure is definitely legitimate 381 #ifdef _LP64 382 const Register thread = r15_thread; 383 #else 384 const Register thread = tmp2; 385 __ get_thread(thread); 386 #endif 387 Address gc_state(thread, in_bytes(JavaThread::gc_state_offset())); 388 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 389 __ jcc(Assembler::zero, L_failure); 390 391 #ifdef _LP64 392 if (UseCompressedOops) { 393 __ movl(tmp2, oldval); 394 __ decode_heap_oop(tmp2); 395 } else 396 #endif 397 { 398 __ movptr(tmp2, oldval); 399 } 400 401 // Decode offending in-memory value. 402 // Test if-forwarded 403 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value); 404 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 405 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 406 407 // Load and mask forwarding pointer 408 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 409 __ shrptr(tmp2, 2); 410 __ shlptr(tmp2, 2); 411 412 #ifdef _LP64 413 if (UseCompressedOops) { 414 __ decode_heap_oop(tmp1); // decode for comparison 415 } 416 #endif 417 418 // Now we have the forwarded offender in tmp2. 419 // Compare and if they don't match, we have legitimate failure 420 __ cmpptr(tmp1, tmp2); 421 __ jcc(Assembler::notEqual, L_failure); 422 423 // Step 3. Need to fix the memory ptr before continuing. 424 // 425 // At this point, we have from-space oldval in the register, and its to-space 426 // address is in tmp2. Let's try to update it into memory. We don't care if it 427 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 428 // If this fixup fails, this means somebody else beat us to it, and necessarily 429 // with to-space ptr store. We still have to do the retry, because the GC might 430 // have updated the reference for us. 431 432 #ifdef _LP64 433 if (UseCompressedOops) { 434 __ encode_heap_oop(tmp2); // previously decoded at step 2. 435 } 436 #endif 437 438 if (os::is_MP()) __ lock(); 439 #ifdef _LP64 440 if (UseCompressedOops) { 441 __ cmpxchgl(tmp2, addr); 442 } else 443 #endif 444 { 445 __ cmpxchgptr(tmp2, addr); 446 } 447 448 // Step 4. Try to CAS again. 449 // 450 // This is guaranteed not to have false negatives, because oldval is definitely 451 // to-space, and memory pointer is to-space as well. Nothing is able to store 452 // from-space ptr into memory anymore. Make sure oldval is restored, after being 453 // garbled during retries. 454 // 455 #ifdef _LP64 456 if (UseCompressedOops) { 457 __ movl(oldval, tmp2); 458 } else 459 #endif 460 { 461 __ movptr(oldval, tmp2); 462 } 463 464 if (os::is_MP()) __ lock(); 465 #ifdef _LP64 466 if (UseCompressedOops) { 467 __ cmpxchgl(newval, addr); 468 } else 469 #endif 470 { 471 __ cmpxchgptr(newval, addr); 472 } 473 if (!exchange) { 474 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 475 } 476 477 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 478 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 479 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 480 481 if (exchange) { 482 __ bind(L_failure); 483 __ bind(L_success); 484 } else { 485 assert(res != NULL, "need result register"); 486 487 Label exit; 488 __ bind(L_failure); 489 __ xorptr(res, res); 490 __ jmpb(exit); 491 492 __ bind(L_success); 493 __ movptr(res, 1); 494 __ bind(exit); 495 } 496 } 497 498 #undef __ 499 500 #ifdef COMPILER1 501 502 #define __ ce->masm()-> 503 504 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 505 __ bind(*stub->entry()); 506 507 Label done; 508 Register obj = stub->obj()->as_register(); 509 Register res = stub->result()->as_register(); 510 Register addr = stub->addr()->as_pointer_register(); 511 Register tmp1 = stub->tmp1()->as_register(); 512 Register tmp2 = stub->tmp2()->as_register(); 513 assert_different_registers(obj, res, addr, tmp1, tmp2); 514 515 Label slow_path; 516 517 assert(res == rax, "result must arrive in rax"); 518 519 if (res != obj) { 520 __ mov(res, obj); 521 } 522 523 // Check for null. 524 __ testptr(res, res); 525 __ jcc(Assembler::zero, *stub->continuation()); 526 527 // Check for object being in the collection set. 528 __ mov(tmp1, res); 529 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 530 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 531 #ifdef _LP64 532 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 533 __ testbool(tmp2); 534 #else 535 // On x86_32, C1 register allocator can give us the register without 8-bit support. 536 // Do the full-register access and test to avoid compilation failures. 537 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 538 __ testptr(tmp2, 0xFF); 539 #endif 540 __ jcc(Assembler::zero, *stub->continuation()); 541 542 __ bind(slow_path); 543 ce->store_parameter(res, 0); 544 ce->store_parameter(addr, 1); 545 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::shenandoah_lrb_slow_id))); 546 547 __ jmp(*stub->continuation()); 548 } 549 550 #undef __ 551 552 #endif // COMPILER1