1 /*
  2  * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "classfile/classLoaderData.hpp"
 28 #include "gc/shared/barrierSet.hpp"
 29 #include "gc/shared/barrierSetAssembler.hpp"
 30 #include "gc/shared/barrierSetNMethod.hpp"
 31 #include "gc/shared/barrierSetRuntime.hpp"
 32 #include "gc/shared/collectedHeap.hpp"
 33 #include "interpreter/interp_masm.hpp"
 34 #include "memory/universe.hpp"
 35 #include "runtime/javaThread.hpp"
 36 #include "runtime/jniHandles.hpp"
 37 #include "runtime/sharedRuntime.hpp"
 38 #include "runtime/stubRoutines.hpp"
 39 
 40 #define __ masm->
 41 
 42 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 43                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 44   bool in_heap = (decorators & IN_HEAP) != 0;
 45   bool in_native = (decorators & IN_NATIVE) != 0;
 46   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 47   bool atomic = (decorators & MO_RELAXED) != 0;
 48 
 49   switch (type) {
 50   case T_OBJECT:
 51   case T_ARRAY: {
 52     if (in_heap) {
 53 #ifdef _LP64
 54       if (UseCompressedOops) {
 55         __ movl(dst, src);
 56         if (is_not_null) {
 57           __ decode_heap_oop_not_null(dst);
 58         } else {
 59           __ decode_heap_oop(dst);
 60         }
 61       } else
 62 #endif
 63       {
 64         __ movptr(dst, src);
 65       }
 66     } else {
 67       assert(in_native, "why else?");
 68       __ movptr(dst, src);
 69     }
 70     break;
 71   }
 72   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 73   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 74   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 75   case T_SHORT:   __ load_signed_short(dst, src);   break;
 76   case T_INT:     __ movl  (dst, src);              break;
 77   case T_ADDRESS: __ movptr(dst, src);              break;
 78   case T_FLOAT:
 79     assert(dst == noreg, "only to ftos");
 80     __ load_float(src);
 81     break;
 82   case T_DOUBLE:
 83     assert(dst == noreg, "only to dtos");
 84     __ load_double(src);
 85     break;
 86   case T_LONG:
 87     assert(dst == noreg, "only to ltos");
 88 #ifdef _LP64
 89     __ movq(rax, src);
 90 #else
 91     if (atomic) {
 92       __ fild_d(src);               // Must load atomically
 93       __ subptr(rsp,2*wordSize);    // Make space for store
 94       __ fistp_d(Address(rsp,0));
 95       __ pop(rax);
 96       __ pop(rdx);
 97     } else {
 98       __ movl(rax, src);
 99       __ movl(rdx, src.plus_disp(wordSize));
100     }
101 #endif
102     break;
103   default: Unimplemented();
104   }
105 }
106 
107 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
108                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
109   bool in_heap = (decorators & IN_HEAP) != 0;
110   bool in_native = (decorators & IN_NATIVE) != 0;
111   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
112   bool atomic = (decorators & MO_RELAXED) != 0;
113 
114   switch (type) {
115   case T_OBJECT:
116   case T_ARRAY: {
117     if (in_heap) {
118       if (val == noreg) {
119         assert(!is_not_null, "inconsistent access");
120 #ifdef _LP64
121         if (UseCompressedOops) {
122           __ movl(dst, NULL_WORD);
123         } else {
124           __ movslq(dst, NULL_WORD);
125         }
126 #else
127         __ movl(dst, NULL_WORD);
128 #endif
129       } else {
130 #ifdef _LP64
131         if (UseCompressedOops) {
132           assert(!dst.uses(val), "not enough registers");
133           if (is_not_null) {
134             __ encode_heap_oop_not_null(val);
135           } else {
136             __ encode_heap_oop(val);
137           }
138           __ movl(dst, val);
139         } else
140 #endif
141         {
142           __ movptr(dst, val);
143         }
144       }
145     } else {
146       assert(in_native, "why else?");
147       assert(val != noreg, "not supported");
148       __ movptr(dst, val);
149     }
150     break;
151   }
152   case T_BOOLEAN:
153     __ andl(val, 0x1);  // boolean is true if LSB is 1
154     __ movb(dst, val);
155     break;
156   case T_BYTE:
157     __ movb(dst, val);
158     break;
159   case T_SHORT:
160     __ movw(dst, val);
161     break;
162   case T_CHAR:
163     __ movw(dst, val);
164     break;
165   case T_INT:
166     __ movl(dst, val);
167     break;
168   case T_LONG:
169     assert(val == noreg, "only tos");
170 #ifdef _LP64
171     __ movq(dst, rax);
172 #else
173     if (atomic) {
174       __ push(rdx);
175       __ push(rax);                 // Must update atomically with FIST
176       __ fild_d(Address(rsp,0));    // So load into FPU register
177       __ fistp_d(dst);              // and put into memory atomically
178       __ addptr(rsp, 2*wordSize);
179     } else {
180       __ movptr(dst, rax);
181       __ movptr(dst.plus_disp(wordSize), rdx);
182     }
183 #endif
184     break;
185   case T_FLOAT:
186     assert(val == noreg, "only tos");
187     __ store_float(dst);
188     break;
189   case T_DOUBLE:
190     assert(val == noreg, "only tos");
191     __ store_double(dst);
192     break;
193   case T_ADDRESS:
194     __ movptr(dst, val);
195     break;
196   default: Unimplemented();
197   }
198 }
199 
200 void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorators,
201                                      Register src, Register dst, Register value_klass) {
202   // value_copy implementation is fairly complex, and there are not any
203   // "short-cuts" to be made from asm. What there is, appears to have the same
204   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
205   // of hand-rolled instructions...
206   if (decorators & IS_DEST_UNINITIALIZED) {
207     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, value_klass);
208   } else {
209     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, value_klass);
210   }
211 }
212 
213 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
214                                        DecoratorSet decorators,
215                                        BasicType type,
216                                        size_t bytes,
217                                        Register dst,
218                                        Address src,
219                                        Register tmp) {
220   assert(bytes <= 8, "can only deal with non-vector registers");
221   switch (bytes) {
222   case 1:
223     __ movb(dst, src);
224     break;
225   case 2:
226     __ movw(dst, src);
227     break;
228   case 4:
229     __ movl(dst, src);
230     break;
231   case 8:
232 #ifdef _LP64
233     __ movq(dst, src);
234 #else
235     fatal("No support for 8 bytes copy");
236 #endif
237     break;
238   default:
239     fatal("Unexpected size");
240   }
241 #ifdef _LP64
242   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
243     __ decode_heap_oop(dst);
244   }
245 #endif
246 }
247 
248 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
249                                         DecoratorSet decorators,
250                                         BasicType type,
251                                         size_t bytes,
252                                         Address dst,
253                                         Register src,
254                                         Register tmp) {
255 #ifdef _LP64
256   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
257     __ encode_heap_oop(src);
258   }
259 #endif
260   assert(bytes <= 8, "can only deal with non-vector registers");
261   switch (bytes) {
262   case 1:
263     __ movb(dst, src);
264     break;
265   case 2:
266     __ movw(dst, src);
267     break;
268   case 4:
269     __ movl(dst, src);
270     break;
271   case 8:
272 #ifdef _LP64
273     __ movq(dst, src);
274 #else
275     fatal("No support for 8 bytes copy");
276 #endif
277     break;
278   default:
279     fatal("Unexpected size");
280   }
281 }
282 
283 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
284                                        DecoratorSet decorators,
285                                        BasicType type,
286                                        size_t bytes,
287                                        XMMRegister dst,
288                                        Address src,
289                                        Register tmp,
290                                        XMMRegister xmm_tmp) {
291   assert(bytes > 8, "can only deal with vector registers");
292   if (bytes == 16) {
293     __ movdqu(dst, src);
294   } else if (bytes == 32) {
295     __ vmovdqu(dst, src);
296   } else {
297     fatal("No support for >32 bytes copy");
298   }
299 }
300 
301 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
302                                         DecoratorSet decorators,
303                                         BasicType type,
304                                         size_t bytes,
305                                         Address dst,
306                                         XMMRegister src,
307                                         Register tmp1,
308                                         Register tmp2,
309                                         XMMRegister xmm_tmp) {
310   assert(bytes > 8, "can only deal with vector registers");
311   if (bytes == 16) {
312     __ movdqu(dst, src);
313   } else if (bytes == 32) {
314     __ vmovdqu(dst, src);
315   } else {
316     fatal("No support for >32 bytes copy");
317   }
318 }
319 
320 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
321                                                         Register obj, Register tmp, Label& slowpath) {
322   __ clear_jobject_tag(obj);
323   __ movptr(obj, Address(obj, 0));
324 }
325 
326 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
327                                         Register thread, Register obj,
328                                         Register var_size_in_bytes,
329                                         int con_size_in_bytes,
330                                         Register t1,
331                                         Register t2,
332                                         Label& slow_case) {
333   assert_different_registers(obj, t1, t2);
334   assert_different_registers(obj, var_size_in_bytes, t1);
335   Register end = t2;
336   if (!thread->is_valid()) {
337 #ifdef _LP64
338     thread = r15_thread;
339 #else
340     assert(t1->is_valid(), "need temp reg");
341     thread = t1;
342     __ get_thread(thread);
343 #endif
344   }
345 
346   __ verify_tlab();
347 
348   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
349   if (var_size_in_bytes == noreg) {
350     __ lea(end, Address(obj, con_size_in_bytes));
351   } else {
352     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
353   }
354   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
355   __ jcc(Assembler::above, slow_case);
356 
357   // update the tlab top pointer
358   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
359 
360   // recover var_size_in_bytes if necessary
361   if (var_size_in_bytes == end) {
362     __ subptr(var_size_in_bytes, obj);
363   }
364   __ verify_tlab();
365 }
366 
367 void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
368                                                Register var_size_in_bytes,
369                                                int con_size_in_bytes,
370                                                Register t1) {
371   if (!thread->is_valid()) {
372 #ifdef _LP64
373     thread = r15_thread;
374 #else
375     assert(t1->is_valid(), "need temp reg");
376     thread = t1;
377     __ get_thread(thread);
378 #endif
379   }
380 
381 #ifdef _LP64
382   if (var_size_in_bytes->is_valid()) {
383     __ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
384   } else {
385     __ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
386   }
387 #else
388   if (var_size_in_bytes->is_valid()) {
389     __ addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
390   } else {
391     __ addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
392   }
393   __ adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
394 #endif
395 }
396 
397 #ifdef _LP64
398 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
399   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
400   if (bs_nm == nullptr) {
401     return;
402   }
403   Register thread = r15_thread;
404   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
405   // The immediate is the last 4 bytes, so if we align the start of the cmp
406   // instruction to 4 bytes, we know that the second half of it is also 4
407   // byte aligned, which means that the immediate will not cross a cache line
408   __ align(4);
409   uintptr_t before_cmp = (uintptr_t)__ pc();
410   __ cmpl_imm32(disarmed_addr, 0);
411   uintptr_t after_cmp = (uintptr_t)__ pc();
412   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
413 
414   if (slow_path != nullptr) {
415     __ jcc(Assembler::notEqual, *slow_path);
416     __ bind(*continuation);
417   } else {
418     Label done;
419     __ jccb(Assembler::equal, done);
420     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
421     __ bind(done);
422   }
423 }
424 #else
425 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
426   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
427   if (bs_nm == nullptr) {
428     return;
429   }
430 
431   Label continuation;
432 
433   Register tmp = rdi;
434   __ push(tmp);
435   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
436   Address disarmed_addr(tmp, 0);
437   __ align(4);
438   __ cmpl_imm32(disarmed_addr, 0);
439   __ pop(tmp);
440   __ jcc(Assembler::equal, continuation);
441   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
442   __ bind(continuation);
443 }
444 #endif
445 
446 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
447   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
448   if (bs == nullptr) {
449     return;
450   }
451 
452   Label bad_call;
453   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
454   __ jcc(Assembler::equal, bad_call);
455 
456   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
457   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
458 #ifndef _LP64
459   __ push(tmp1);
460   __ push(tmp2);
461 #endif // !_LP64
462 
463   // Pointer chase to the method holder to find out if the method is concurrently unloading.
464   Label method_live;
465   __ load_method_holder_cld(tmp1, rbx);
466 
467    // Is it a strong CLD?
468   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_offset()), 0);
469   __ jcc(Assembler::greater, method_live);
470 
471    // Is it a weak but alive CLD?
472   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
473   __ resolve_weak_handle(tmp1, tmp2);
474   __ cmpptr(tmp1, 0);
475   __ jcc(Assembler::notEqual, method_live);
476 
477 #ifndef _LP64
478   __ pop(tmp2);
479   __ pop(tmp1);
480 #endif
481 
482   __ bind(bad_call);
483   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
484   __ bind(method_live);
485 
486 #ifndef _LP64
487   __ pop(tmp2);
488   __ pop(tmp1);
489 #endif
490 }
491 
492 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
493   // Check if the oop is in the right area of memory
494   __ movptr(tmp1, obj);
495   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
496   __ andptr(tmp1, tmp2);
497   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
498   __ cmpptr(tmp1, tmp2);
499   __ jcc(Assembler::notZero, error);
500 
501   // make sure klass is 'reasonable', which is not zero.
502   __ load_klass(obj, obj, tmp1);  // get klass
503   __ testptr(obj, obj);
504   __ jcc(Assembler::zero, error); // if klass is null it is broken
505 }