1 /*
   2  * Copyright (c) 2018, 2020 Red Hat, Inc. All rights reserved.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.
   7  *
   8  * This code is distributed in the hope that it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  11  * version 2 for more details (a copy is included in the LICENSE file that
  12  * accompanied this code).
  13  *
  14  * You should have received a copy of the GNU General Public License version
  15  * 2 along with this work; if not, write to the Free Software Foundation,
  16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  17  *
  18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  19  * or visit www.oracle.com if you need additional information or have any
  20  * questions.
  21  *
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "c1/c1_MacroAssembler.hpp"
  26 #include "c1/c1_LIRAssembler.hpp"
  27 #include "macroAssembler_x86.hpp"
  28 #include "shenandoahBarrierSetAssembler_x86.hpp"
  29 #include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc_implementation/shenandoah/shenandoahForwarding.hpp"
  31 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
  32 #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc_implementation/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc_implementation/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  35 #include "runtime/stubCodeGenerator.hpp"
  36 
  37 ShenandoahBarrierSetAssembler* ShenandoahBarrierSetAssembler::bsasm() {
  38   return ShenandoahBarrierSet::barrier_set()->bsasm();
  39 }
  40 
  41 #define __ masm->
  42 
  43 static void save_xmm_registers(MacroAssembler* masm) {
  44     __ subptr(rsp, 64);
  45     __ movdbl(Address(rsp, 0), xmm0);
  46     __ movdbl(Address(rsp, 8), xmm1);
  47     __ movdbl(Address(rsp, 16), xmm2);
  48     __ movdbl(Address(rsp, 24), xmm3);
  49     __ movdbl(Address(rsp, 32), xmm4);
  50     __ movdbl(Address(rsp, 40), xmm5);
  51     __ movdbl(Address(rsp, 48), xmm6);
  52     __ movdbl(Address(rsp, 56), xmm7);
  53 }
  54 
  55 static void restore_xmm_registers(MacroAssembler* masm) {
  56     __ movdbl(xmm0, Address(rsp, 0));
  57     __ movdbl(xmm1, Address(rsp, 8));
  58     __ movdbl(xmm2, Address(rsp, 16));
  59     __ movdbl(xmm3, Address(rsp, 24));
  60     __ movdbl(xmm4, Address(rsp, 32));
  61     __ movdbl(xmm5, Address(rsp, 40));
  62     __ movdbl(xmm6, Address(rsp, 48));
  63     __ movdbl(xmm7, Address(rsp, 56));
  64     __ addptr(rsp, 64);
  65 }
  66 
  67 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, bool dest_uninitialized,
  68                                                        Register src, Register dst, Register count) {
  69 
  70   if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahStoreValEnqueueBarrier || ShenandoahLoadRefBarrier) {
  71 #ifdef _LP64
  72     Register thread = r15_thread;
  73 #else
  74     Register thread = rax;
  75     if (thread == src || thread == dst || thread == count) {
  76       thread = rbx;
  77     }
  78     if (thread == src || thread == dst || thread == count) {
  79       thread = rcx;
  80     }
  81     if (thread == src || thread == dst || thread == count) {
  82       thread = rdx;
  83     }
  84     __ push(thread);
  85     __ get_thread(thread);
  86 #endif
  87     assert_different_registers(src, dst, count, thread);
  88 
  89     Label done;
  90     // Short-circuit if count == 0.
  91     __ testptr(count, count);
  92     __ jcc(Assembler::zero, done);
  93 
  94     // Avoid runtime call when not active.
  95     Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
  96     int flags;
  97     if (ShenandoahSATBBarrier && dest_uninitialized) {
  98       flags = ShenandoahHeap::HAS_FORWARDED;
  99     } else {
 100       flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
 101     }
 102     __ testb(gc_state, flags);
 103     __ jcc(Assembler::zero, done);
 104 
 105     __ pusha();                      // push registers
 106 
 107 #ifdef _LP64
 108     assert(src == rdi, "expected");
 109     assert(dst == rsi, "expected");
 110     // commented-out for generate_conjoint_long_oop_copy(), call_VM_leaf() will move
 111     // register into right place.
 112     // assert(count == rdx, "expected");
 113     if (UseCompressedOops) {
 114       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
 115                         src, dst, count);
 116     } else
 117 #endif
 118     {
 119       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry),
 120                       src, dst, count);
 121     }
 122 
 123     __ popa();
 124     __ bind(done);
 125     NOT_LP64(__ pop(thread);)
 126   }
 127 }
 128 
 129 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) {
 130   if (!ShenandoahLoadRefBarrier) {
 131     return;
 132   }
 133 
 134   bool is_narrow  = UseCompressedOops;
 135 
 136   Label heap_stable, not_cset;
 137 
 138   __ block_comment("load_reference_barrier { ");
 139 
 140   // Check if GC is active
 141 #ifdef _LP64
 142   Register thread = r15_thread;
 143 #else
 144   Register thread = rsi;
 145   if (thread == dst) {
 146     thread = rbx;
 147   }
 148   assert_different_registers(dst, src.base(), src.index(), thread);
 149   __ push(thread);
 150   __ get_thread(thread);
 151 #endif
 152 
 153   Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
 154   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
 155   __ jcc(Assembler::zero, heap_stable);
 156 
 157   Register tmp1 = noreg, tmp2 = noreg;
 158 
 159   // Test for object in cset
 160   // Allocate temporary registers
 161   for (int i = 0; i < 8; i++) {
 162     Register r = as_Register(i);
 163     if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) {
 164       if (tmp1 == noreg) {
 165         tmp1 = r;
 166       } else {
 167         tmp2 = r;
 168         break;
 169       }
 170     }
 171   }
 172   assert(tmp1 != noreg, "tmp1 allocated");
 173   assert(tmp2 != noreg, "tmp2 allocated");
 174   assert_different_registers(tmp1, tmp2, src.base(), src.index());
 175   assert_different_registers(tmp1, tmp2, dst);
 176 
 177   __ push(tmp1);
 178   __ push(tmp2);
 179 
 180   // Optimized cset-test
 181   __ movptr(tmp1, dst);
 182   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 183   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 184   __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
 185   __ testbool(tmp1);
 186   __ jcc(Assembler::zero, not_cset);
 187 
 188   uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4);
 189   __ subptr(rsp, num_saved_regs * wordSize);
 190   uint slot = num_saved_regs;
 191   if (dst != rax) {
 192     __ movptr(Address(rsp, (--slot) * wordSize), rax);
 193   }
 194   __ movptr(Address(rsp, (--slot) * wordSize), rcx);
 195   __ movptr(Address(rsp, (--slot) * wordSize), rdx);
 196   __ movptr(Address(rsp, (--slot) * wordSize), rdi);
 197   __ movptr(Address(rsp, (--slot) * wordSize), rsi);
 198 #ifdef _LP64
 199   __ movptr(Address(rsp, (--slot) * wordSize), r8);
 200   __ movptr(Address(rsp, (--slot) * wordSize), r9);
 201   __ movptr(Address(rsp, (--slot) * wordSize), r10);
 202   __ movptr(Address(rsp, (--slot) * wordSize), r11);
 203   // r12-r15 are callee saved in all calling conventions
 204 #endif
 205   assert(slot == 0, "must use all slots");
 206 
 207   // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
 208 #ifdef _LP64
 209   Register arg0 = c_rarg0, arg1 = c_rarg1;
 210 #else
 211   Register arg0 = rdi, arg1 = rsi;
 212 #endif
 213   if (dst == arg1) {
 214     __ lea(arg0, src);
 215     __ xchgptr(arg1, arg0);
 216   } else {
 217     __ lea(arg1, src);
 218     __ movptr(arg0, dst);
 219   }
 220 
 221   save_xmm_registers(masm);
 222   if (is_narrow) {
 223     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), arg0, arg1);
 224   } else {
 225     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), arg0, arg1);
 226   }
 227   restore_xmm_registers(masm);
 228 
 229 #ifdef _LP64
 230   __ movptr(r11, Address(rsp, (slot++) * wordSize));
 231   __ movptr(r10, Address(rsp, (slot++) * wordSize));
 232   __ movptr(r9,  Address(rsp, (slot++) * wordSize));
 233   __ movptr(r8,  Address(rsp, (slot++) * wordSize));
 234 #endif
 235   __ movptr(rsi, Address(rsp, (slot++) * wordSize));
 236   __ movptr(rdi, Address(rsp, (slot++) * wordSize));
 237   __ movptr(rdx, Address(rsp, (slot++) * wordSize));
 238   __ movptr(rcx, Address(rsp, (slot++) * wordSize));
 239 
 240   if (dst != rax) {
 241     __ movptr(dst, rax);
 242     __ movptr(rax, Address(rsp, (slot++) * wordSize));
 243   }
 244 
 245   assert(slot == num_saved_regs, "must use all slots");
 246   __ addptr(rsp, num_saved_regs * wordSize);
 247 
 248   __ bind(not_cset);
 249 
 250   __ pop(tmp2);
 251   __ pop(tmp1);
 252 
 253   __ bind(heap_stable);
 254 
 255   __ block_comment("} load_reference_barrier");
 256 
 257 #ifndef _LP64
 258     __ pop(thread);
 259 #endif
 260 }
 261 
 262 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
 263   if (ShenandoahStoreValEnqueueBarrier) {
 264     storeval_barrier_impl(masm, dst, tmp);
 265   }
 266 }
 267 
 268 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) {
 269   assert(ShenandoahStoreValEnqueueBarrier, "should be enabled");
 270 
 271   if (dst == noreg) return;
 272 
 273   if (ShenandoahStoreValEnqueueBarrier) {
 274     // The set of registers to be saved+restored is the same as in the write-barrier above.
 275     // Those are the commonly used registers in the interpreter.
 276     __ pusha();
 277     // __ push_callee_saved_registers();
 278     __ subptr(rsp, 2 * Interpreter::stackElementSize);
 279     __ movdbl(Address(rsp, 0), xmm0);
 280 
 281 #ifdef _LP64
 282     Register thread = r15_thread;
 283 #else
 284     Register thread = rcx;
 285     if (thread == dst || thread == tmp) {
 286       thread = rdi;
 287     }
 288     if (thread == dst || thread == tmp) {
 289       thread = rbx;
 290     }
 291     __ get_thread(thread);
 292 #endif
 293     assert_different_registers(dst, tmp, thread);
 294 
 295     __ g1_write_barrier_pre(noreg, dst, thread, tmp, true, false);
 296     __ movdbl(xmm0, Address(rsp, 0));
 297     __ addptr(rsp, 2 * Interpreter::stackElementSize);
 298     //__ pop_callee_saved_registers();
 299     __ popa();
 300   }
 301 }
 302 
 303 void ShenandoahBarrierSetAssembler::load_heap_oop(MacroAssembler* masm, Register dst, Address src) {
 304   Register result_dst = dst;
 305   // Preserve src location for LRB
 306   if (dst == src.base() || dst == src.index()) {
 307     dst = rdi;
 308     __ push(dst);
 309     assert_different_registers(dst, src.base(), src.index());
 310   }
 311 
 312 #ifdef _LP64
 313   // FIXME: Must change all places where we try to load the klass.
 314   if (UseCompressedOops) {
 315     __ movl(dst, src);
 316     __ decode_heap_oop(dst);
 317   } else
 318 #endif
 319     __ movptr(dst, src);
 320 
 321   load_reference_barrier(masm, dst, src);
 322 
 323   // Move loaded oop to final destination
 324   if (dst != result_dst) {
 325     __ movptr(result_dst, dst);
 326     __ pop(dst);
 327   }
 328 }
 329 
 330 // Special Shenandoah CAS implementation that handles false negatives
 331 // due to concurrent evacuation.
 332 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
 333                                                 Register res, Address addr, Register oldval, Register newval,
 334                                                 bool exchange, Register tmp1, Register tmp2) {
 335   assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
 336   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 337   assert_different_registers(oldval, newval, tmp1, tmp2);
 338 
 339   Label L_success, L_failure;
 340 
 341   // Remember oldval for retry logic below
 342 #ifdef _LP64
 343   if (UseCompressedOops) {
 344     __ movl(tmp1, oldval);
 345   } else
 346 #endif
 347   {
 348     __ movptr(tmp1, oldval);
 349   }
 350 
 351   // Step 1. Fast-path.
 352   //
 353   // Try to CAS with given arguments. If successful, then we are done.
 354 
 355   if (os::is_MP()) __ lock();
 356 #ifdef _LP64
 357   if (UseCompressedOops) {
 358     __ cmpxchgl(newval, addr);
 359   } else
 360 #endif
 361   {
 362     __ cmpxchgptr(newval, addr);
 363   }
 364   __ jcc(Assembler::equal, L_success);
 365 
 366   // Step 2. CAS had failed. This may be a false negative.
 367   //
 368   // The trouble comes when we compare the to-space pointer with the from-space
 369   // pointer to the same object. To resolve this, it will suffice to resolve
 370   // the value from memory -- this will give both to-space pointers.
 371   // If they mismatch, then it was a legitimate failure.
 372   //
 373   // Before reaching to resolve sequence, see if we can avoid the whole shebang
 374   // with filters.
 375 
 376   // Filter: when offending in-memory value is NULL, the failure is definitely legitimate
 377   __ testptr(oldval, oldval);
 378   __ jcc(Assembler::zero, L_failure);
 379 
 380   // Filter: when heap is stable, the failure is definitely legitimate
 381 #ifdef _LP64
 382   const Register thread = r15_thread;
 383 #else
 384   const Register thread = tmp2;
 385   __ get_thread(thread);
 386 #endif
 387   Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
 388   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
 389   __ jcc(Assembler::zero, L_failure);
 390 
 391 #ifdef _LP64
 392   if (UseCompressedOops) {
 393     __ movl(tmp2, oldval);
 394     __ decode_heap_oop(tmp2);
 395   } else
 396 #endif
 397   {
 398     __ movptr(tmp2, oldval);
 399   }
 400 
 401   // Decode offending in-memory value.
 402   // Test if-forwarded
 403   __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value);
 404   __ jcc(Assembler::noParity, L_failure);  // When odd number of bits, then not forwarded
 405   __ jcc(Assembler::zero, L_failure);      // When it is 00, then also not forwarded
 406 
 407   // Load and mask forwarding pointer
 408   __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
 409   __ shrptr(tmp2, 2);
 410   __ shlptr(tmp2, 2);
 411 
 412 #ifdef _LP64
 413   if (UseCompressedOops) {
 414     __ decode_heap_oop(tmp1); // decode for comparison
 415   }
 416 #endif
 417 
 418   // Now we have the forwarded offender in tmp2.
 419   // Compare and if they don't match, we have legitimate failure
 420   __ cmpptr(tmp1, tmp2);
 421   __ jcc(Assembler::notEqual, L_failure);
 422 
 423   // Step 3. Need to fix the memory ptr before continuing.
 424   //
 425   // At this point, we have from-space oldval in the register, and its to-space
 426   // address is in tmp2. Let's try to update it into memory. We don't care if it
 427   // succeeds or not. If it does, then the retrying CAS would see it and succeed.
 428   // If this fixup fails, this means somebody else beat us to it, and necessarily
 429   // with to-space ptr store. We still have to do the retry, because the GC might
 430   // have updated the reference for us.
 431 
 432 #ifdef _LP64
 433   if (UseCompressedOops) {
 434     __ encode_heap_oop(tmp2); // previously decoded at step 2.
 435   }
 436 #endif
 437 
 438   if (os::is_MP()) __ lock();
 439 #ifdef _LP64
 440   if (UseCompressedOops) {
 441     __ cmpxchgl(tmp2, addr);
 442   } else
 443 #endif
 444   {
 445     __ cmpxchgptr(tmp2, addr);
 446   }
 447 
 448   // Step 4. Try to CAS again.
 449   //
 450   // This is guaranteed not to have false negatives, because oldval is definitely
 451   // to-space, and memory pointer is to-space as well. Nothing is able to store
 452   // from-space ptr into memory anymore. Make sure oldval is restored, after being
 453   // garbled during retries.
 454   //
 455 #ifdef _LP64
 456   if (UseCompressedOops) {
 457     __ movl(oldval, tmp2);
 458   } else
 459 #endif
 460   {
 461     __ movptr(oldval, tmp2);
 462   }
 463 
 464   if (os::is_MP()) __ lock();
 465 #ifdef _LP64
 466   if (UseCompressedOops) {
 467     __ cmpxchgl(newval, addr);
 468   } else
 469 #endif
 470   {
 471     __ cmpxchgptr(newval, addr);
 472   }
 473   if (!exchange) {
 474     __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
 475   }
 476 
 477   // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
 478   // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
 479   // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
 480 
 481   if (exchange) {
 482     __ bind(L_failure);
 483     __ bind(L_success);
 484   } else {
 485     assert(res != NULL, "need result register");
 486 
 487     Label exit;
 488     __ bind(L_failure);
 489     __ xorptr(res, res);
 490     __ jmpb(exit);
 491 
 492     __ bind(L_success);
 493     __ movptr(res, 1);
 494     __ bind(exit);
 495   }
 496 }
 497 
 498 #undef __
 499 
 500 #ifdef COMPILER1
 501 
 502 #define __ ce->masm()->
 503 
 504 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 505   __ bind(*stub->entry());
 506 
 507   Label done;
 508   Register obj = stub->obj()->as_register();
 509   Register res = stub->result()->as_register();
 510   Register addr = stub->addr()->as_pointer_register();
 511   Register tmp1 = stub->tmp1()->as_register();
 512   Register tmp2 = stub->tmp2()->as_register();
 513   assert_different_registers(obj, res, addr, tmp1, tmp2);
 514 
 515   Label slow_path;
 516 
 517   assert(res == rax, "result must arrive in rax");
 518 
 519   if (res != obj) {
 520     __ mov(res, obj);
 521   }
 522 
 523   // Check for null.
 524   __ testptr(res, res);
 525   __ jcc(Assembler::zero, *stub->continuation());
 526 
 527   // Check for object being in the collection set.
 528   __ mov(tmp1, res);
 529   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
 530   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
 531 #ifdef _LP64
 532   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
 533   __ testbool(tmp2);
 534 #else
 535   // On x86_32, C1 register allocator can give us the register without 8-bit support.
 536   // Do the full-register access and test to avoid compilation failures.
 537   __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1));
 538   __ testptr(tmp2, 0xFF);
 539 #endif
 540   __ jcc(Assembler::zero, *stub->continuation());
 541 
 542   __ bind(slow_path);
 543   ce->store_parameter(res, 0);
 544   ce->store_parameter(addr, 1);
 545   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::shenandoah_lrb_slow_id)));
 546 
 547   __ jmp(*stub->continuation());
 548 }
 549 
 550 #undef __
 551 
 552 #endif // COMPILER1