1 /*
   2  * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.
   7  *
   8  * This code is distributed in the hope that it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  11  * version 2 for more details (a copy is included in the LICENSE file that
  12  * accompanied this code).
  13  *
  14  * You should have received a copy of the GNU General Public License version
  15  * 2 along with this work; if not, write to the Free Software Foundation,
  16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  17  *
  18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  19  * or visit www.oracle.com if you need additional information or have any
  20  * questions.
  21  *
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "c1/c1_MacroAssembler.hpp"
  26 #include "c1/c1_LIRAssembler.hpp"
  27 #include "macroAssembler_x86.hpp"
  28 #include "shenandoahBarrierSetAssembler_x86.hpp"
  29 #include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp"
  30 #include "gc_implementation/shenandoah/shenandoahForwarding.hpp"
  31 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
  32 #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc_implementation/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc_implementation/shenandoah/c1/shenandoahBarrierSetC1.hpp"
  35 #include "runtime/stubCodeGenerator.hpp"
  36 
  37 ShenandoahBarrierSetAssembler* ShenandoahBarrierSetAssembler::bsasm() {
  38   return ShenandoahBarrierSet::barrier_set()->bsasm();
  39 }
  40 
  41 #define __ masm->
  42 
  43 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, bool dest_uninitialized,
  44                                                        Register src, Register dst, Register count) {
  45 
  46   if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahStoreValEnqueueBarrier || ShenandoahLoadRefBarrier) {
  47 #ifdef _LP64
  48     Register thread = r15_thread;
  49 #else
  50     Register thread = rax;
  51     if (thread == src || thread == dst || thread == count) {
  52       thread = rbx;
  53     }
  54     if (thread == src || thread == dst || thread == count) {
  55       thread = rcx;
  56     }
  57     if (thread == src || thread == dst || thread == count) {
  58       thread = rdx;
  59     }
  60     __ push(thread);
  61     __ get_thread(thread);
  62 #endif
  63     assert_different_registers(src, dst, count, thread);
  64 
  65     Label done;
  66     // Short-circuit if count == 0.
  67     __ testptr(count, count);
  68     __ jcc(Assembler::zero, done);
  69 
  70     // Avoid runtime call when not active.
  71     Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
  72     int flags;
  73     if (ShenandoahSATBBarrier && dest_uninitialized) {
  74       flags = ShenandoahHeap::HAS_FORWARDED;
  75     } else {
  76       flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
  77     }
  78     __ testb(gc_state, flags);
  79     __ jcc(Assembler::zero, done);
  80 
  81     __ pusha();                      // push registers
  82 
  83 #ifdef _LP64
  84     assert(src == rdi, "expected");
  85     assert(dst == rsi, "expected");
  86     // commented-out for generate_conjoint_long_oop_copy(), call_VM_leaf() will move
  87     // register into right place.
  88     // assert(count == rdx, "expected");
  89     if (UseCompressedOops) {
  90       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
  91                         src, dst, count);
  92     } else
  93 #endif
  94     {
  95       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry),
  96                       src, dst, count);
  97     }
  98 
  99     __ popa();
 100     __ bind(done);
 101     NOT_LP64(__ pop(thread);)
 102   }
 103 }
 104 
 105 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
 106   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 107 
 108   Label done;
 109 
 110 #ifdef _LP64
 111   Register thread = r15_thread;
 112 #else
 113   Register thread = rcx;
 114   if (thread == dst) {
 115     thread = rbx;
 116   }
 117   __ push(thread);
 118   __ get_thread(thread);
 119 #endif
 120   assert_different_registers(dst, thread);
 121 
 122   Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
 123   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
 124   __ jcc(Assembler::zero, done);
 125 
 126   {
 127     __ save_vector_registers();
 128 
 129     __ subptr(rsp, LP64_ONLY(16) NOT_LP64(8) * wordSize);
 130 
 131     __ movptr(Address(rsp,  0 * wordSize), rax);
 132     __ movptr(Address(rsp,  1 * wordSize), rcx);
 133     __ movptr(Address(rsp,  2 * wordSize), rdx);
 134     __ movptr(Address(rsp,  3 * wordSize), rbx);
 135     // skip rsp
 136     __ movptr(Address(rsp,  5 * wordSize), rbp);
 137     __ movptr(Address(rsp,  6 * wordSize), rsi);
 138     __ movptr(Address(rsp,  7 * wordSize), rdi);
 139 #ifdef _LP64
 140     __ movptr(Address(rsp,  8 * wordSize),  r8);
 141     __ movptr(Address(rsp,  9 * wordSize),  r9);
 142     __ movptr(Address(rsp, 10 * wordSize), r10);
 143     __ movptr(Address(rsp, 11 * wordSize), r11);
 144     __ movptr(Address(rsp, 12 * wordSize), r12);
 145     __ movptr(Address(rsp, 13 * wordSize), r13);
 146     __ movptr(Address(rsp, 14 * wordSize), r14);
 147     __ movptr(Address(rsp, 15 * wordSize), r15);
 148 #endif
 149   }
 150   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_interpreter), dst);
 151   {
 152 #ifdef _LP64
 153     __ movptr(r15, Address(rsp, 15 * wordSize));
 154     __ movptr(r14, Address(rsp, 14 * wordSize));
 155     __ movptr(r13, Address(rsp, 13 * wordSize));
 156     __ movptr(r12, Address(rsp, 12 * wordSize));
 157     __ movptr(r11, Address(rsp, 11 * wordSize));
 158     __ movptr(r10, Address(rsp, 10 * wordSize));
 159     __ movptr(r9,  Address(rsp,  9 * wordSize));
 160     __ movptr(r8,  Address(rsp,  8 * wordSize));
 161 #endif
 162     __ movptr(rdi, Address(rsp,  7 * wordSize));
 163     __ movptr(rsi, Address(rsp,  6 * wordSize));
 164     __ movptr(rbp, Address(rsp,  5 * wordSize));
 165     // skip rsp
 166     __ movptr(rbx, Address(rsp,  3 * wordSize));
 167     __ movptr(rdx, Address(rsp,  2 * wordSize));
 168     __ movptr(rcx, Address(rsp,  1 * wordSize));
 169     if (dst != rax) {
 170       __ movptr(dst, rax);
 171       __ movptr(rax, Address(rsp, 0 * wordSize));
 172     }
 173     __ addptr(rsp, LP64_ONLY(16) NOT_LP64(8) * wordSize);
 174 
 175     __ restore_vector_registers();
 176   }
 177   __ bind(done);
 178 
 179 #ifndef _LP64
 180   __ pop(thread);
 181 #endif
 182 }
 183 
 184 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
 185   if (ShenandoahStoreValEnqueueBarrier) {
 186     storeval_barrier_impl(masm, dst, tmp);
 187   }
 188 }
 189 
 190 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) {
 191   assert(ShenandoahStoreValEnqueueBarrier, "should be enabled");
 192 
 193   if (dst == noreg) return;
 194 
 195   if (ShenandoahStoreValEnqueueBarrier) {
 196     // The set of registers to be saved+restored is the same as in the write-barrier above.
 197     // Those are the commonly used registers in the interpreter.
 198     __ pusha();
 199     // __ push_callee_saved_registers();
 200     __ subptr(rsp, 2 * Interpreter::stackElementSize);
 201     __ movdbl(Address(rsp, 0), xmm0);
 202 
 203 #ifdef _LP64
 204     Register thread = r15_thread;
 205 #else
 206     Register thread = rcx;
 207     if (thread == dst || thread == tmp) {
 208       thread = rdi;
 209     }
 210     if (thread == dst || thread == tmp) {
 211       thread = rbx;
 212     }
 213     __ get_thread(thread);
 214 #endif
 215     assert_different_registers(dst, tmp, thread);
 216 
 217     __ g1_write_barrier_pre(noreg, dst, thread, tmp, true, false);
 218     __ movdbl(xmm0, Address(rsp, 0));
 219     __ addptr(rsp, 2 * Interpreter::stackElementSize);
 220     //__ pop_callee_saved_registers();
 221     __ popa();
 222   }
 223 }
 224 
 225 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) {
 226   if (ShenandoahLoadRefBarrier) {
 227     Label done;
 228     __ testptr(dst, dst);
 229     __ jcc(Assembler::zero, done);
 230     load_reference_barrier_not_null(masm, dst);
 231     __ bind(done);
 232   }
 233 }
 234 
 235 // Special Shenandoah CAS implementation that handles false negatives
 236 // due to concurrent evacuation.
 237 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
 238                                                 Register res, Address addr, Register oldval, Register newval,
 239                                                 bool exchange, Register tmp1, Register tmp2) {
 240   assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
 241   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
 242   assert_different_registers(oldval, newval, tmp1, tmp2);
 243 
 244   Label L_success, L_failure;
 245 
 246   // Remember oldval for retry logic below
 247 #ifdef _LP64
 248   if (UseCompressedOops) {
 249     __ movl(tmp1, oldval);
 250   } else
 251 #endif
 252   {
 253     __ movptr(tmp1, oldval);
 254   }
 255 
 256   // Step 1. Fast-path.
 257   //
 258   // Try to CAS with given arguments. If successful, then we are done.
 259 
 260   if (os::is_MP()) __ lock();
 261 #ifdef _LP64
 262   if (UseCompressedOops) {
 263     __ cmpxchgl(newval, addr);
 264   } else
 265 #endif
 266   {
 267     __ cmpxchgptr(newval, addr);
 268   }
 269   __ jcc(Assembler::equal, L_success);
 270 
 271   // Step 2. CAS had failed. This may be a false negative.
 272   //
 273   // The trouble comes when we compare the to-space pointer with the from-space
 274   // pointer to the same object. To resolve this, it will suffice to resolve
 275   // the value from memory -- this will give both to-space pointers.
 276   // If they mismatch, then it was a legitimate failure.
 277   //
 278   // Before reaching to resolve sequence, see if we can avoid the whole shebang
 279   // with filters.
 280 
 281   // Filter: when offending in-memory value is NULL, the failure is definitely legitimate
 282   __ testptr(oldval, oldval);
 283   __ jcc(Assembler::zero, L_failure);
 284 
 285   // Filter: when heap is stable, the failure is definitely legitimate
 286 #ifdef _LP64
 287   const Register thread = r15_thread;
 288 #else
 289   const Register thread = tmp2;
 290   __ get_thread(thread);
 291 #endif
 292   Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
 293   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
 294   __ jcc(Assembler::zero, L_failure);
 295 
 296 #ifdef _LP64
 297   if (UseCompressedOops) {
 298     __ movl(tmp2, oldval);
 299     __ decode_heap_oop(tmp2);
 300   } else
 301 #endif
 302   {
 303     __ movptr(tmp2, oldval);
 304   }
 305 
 306   // Decode offending in-memory value.
 307   // Test if-forwarded
 308   __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value);
 309   __ jcc(Assembler::noParity, L_failure);  // When odd number of bits, then not forwarded
 310   __ jcc(Assembler::zero, L_failure);      // When it is 00, then also not forwarded
 311 
 312   // Load and mask forwarding pointer
 313   __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
 314   __ shrptr(tmp2, 2);
 315   __ shlptr(tmp2, 2);
 316 
 317 #ifdef _LP64
 318   if (UseCompressedOops) {
 319     __ decode_heap_oop(tmp1); // decode for comparison
 320   }
 321 #endif
 322 
 323   // Now we have the forwarded offender in tmp2.
 324   // Compare and if they don't match, we have legitimate failure
 325   __ cmpptr(tmp1, tmp2);
 326   __ jcc(Assembler::notEqual, L_failure);
 327 
 328   // Step 3. Need to fix the memory ptr before continuing.
 329   //
 330   // At this point, we have from-space oldval in the register, and its to-space
 331   // address is in tmp2. Let's try to update it into memory. We don't care if it
 332   // succeeds or not. If it does, then the retrying CAS would see it and succeed.
 333   // If this fixup fails, this means somebody else beat us to it, and necessarily
 334   // with to-space ptr store. We still have to do the retry, because the GC might
 335   // have updated the reference for us.
 336 
 337 #ifdef _LP64
 338   if (UseCompressedOops) {
 339     __ encode_heap_oop(tmp2); // previously decoded at step 2.
 340   }
 341 #endif
 342 
 343   if (os::is_MP()) __ lock();
 344 #ifdef _LP64
 345   if (UseCompressedOops) {
 346     __ cmpxchgl(tmp2, addr);
 347   } else
 348 #endif
 349   {
 350     __ cmpxchgptr(tmp2, addr);
 351   }
 352 
 353   // Step 4. Try to CAS again.
 354   //
 355   // This is guaranteed not to have false negatives, because oldval is definitely
 356   // to-space, and memory pointer is to-space as well. Nothing is able to store
 357   // from-space ptr into memory anymore. Make sure oldval is restored, after being
 358   // garbled during retries.
 359   //
 360 #ifdef _LP64
 361   if (UseCompressedOops) {
 362     __ movl(oldval, tmp2);
 363   } else
 364 #endif
 365   {
 366     __ movptr(oldval, tmp2);
 367   }
 368 
 369   if (os::is_MP()) __ lock();
 370 #ifdef _LP64
 371   if (UseCompressedOops) {
 372     __ cmpxchgl(newval, addr);
 373   } else
 374 #endif
 375   {
 376     __ cmpxchgptr(newval, addr);
 377   }
 378   if (!exchange) {
 379     __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
 380   }
 381 
 382   // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
 383   // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
 384   // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
 385 
 386   if (exchange) {
 387     __ bind(L_failure);
 388     __ bind(L_success);
 389   } else {
 390     assert(res != NULL, "need result register");
 391 
 392     Label exit;
 393     __ bind(L_failure);
 394     __ xorptr(res, res);
 395     __ jmpb(exit);
 396 
 397     __ bind(L_success);
 398     __ movptr(res, 1);
 399     __ bind(exit);
 400   }
 401 }
 402 
 403 #undef __
 404 
 405 #ifdef COMPILER1
 406 
 407 #define __ ce->masm()->
 408 
 409 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
 410   __ bind(*stub->entry());
 411 
 412   Label done;
 413   Register obj = stub->obj()->as_register();
 414   Register res = stub->result()->as_register();
 415 
 416   if (res != obj) {
 417     __ mov(res, obj);
 418   }
 419 
 420   // Check for null.
 421   __ testptr(res, res);
 422   __ jcc(Assembler::zero, done);
 423 
 424   load_reference_barrier_not_null(ce->masm(), res);
 425 
 426   __ bind(done);
 427   __ jmp(*stub->continuation());
 428 }
 429 
 430 #undef __
 431 
 432 #endif // COMPILER1