1 /*
  2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "classfile/classLoaderData.hpp"
 28 #include "gc/shared/barrierSet.hpp"
 29 #include "gc/shared/barrierSetAssembler.hpp"
 30 #include "gc/shared/barrierSetNMethod.hpp"
 31 #include "gc/shared/barrierSetRuntime.hpp"
 32 #include "gc/shared/collectedHeap.hpp"
 33 #include "interpreter/interp_masm.hpp"
 34 #include "memory/universe.hpp"
 35 #include "runtime/javaThread.hpp"
 36 #include "runtime/jniHandles.hpp"
 37 #include "runtime/sharedRuntime.hpp"
 38 #include "runtime/stubRoutines.hpp"
 39 #ifdef COMPILER2
 40 #include "gc/shared/c2/barrierSetC2.hpp"
 41 #endif // COMPILER2
 42 
 43 #define __ masm->
 44 
 45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 46                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 47   bool in_heap = (decorators & IN_HEAP) != 0;
 48   bool in_native = (decorators & IN_NATIVE) != 0;
 49   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 50   bool atomic = (decorators & MO_RELAXED) != 0;
 51 
 52   switch (type) {
 53   case T_OBJECT:
 54   case T_ARRAY: {
 55     if (in_heap) {
 56 #ifdef _LP64
 57       if (UseCompressedOops) {
 58         __ movl(dst, src);
 59         if (is_not_null) {
 60           __ decode_heap_oop_not_null(dst);
 61         } else {
 62           __ decode_heap_oop(dst);
 63         }
 64       } else
 65 #endif
 66       {
 67         __ movptr(dst, src);
 68       }
 69     } else {
 70       assert(in_native, "why else?");
 71       __ movptr(dst, src);
 72     }
 73     break;
 74   }
 75   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 76   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 77   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 78   case T_SHORT:   __ load_signed_short(dst, src);   break;
 79   case T_INT:     __ movl  (dst, src);              break;
 80   case T_ADDRESS: __ movptr(dst, src);              break;
 81   case T_FLOAT:
 82     assert(dst == noreg, "only to ftos");
 83     __ load_float(src);
 84     break;
 85   case T_DOUBLE:
 86     assert(dst == noreg, "only to dtos");
 87     __ load_double(src);
 88     break;
 89   case T_LONG:
 90     assert(dst == noreg, "only to ltos");
 91 #ifdef _LP64
 92     __ movq(rax, src);
 93 #else
 94     if (atomic) {
 95       __ fild_d(src);               // Must load atomically
 96       __ subptr(rsp,2*wordSize);    // Make space for store
 97       __ fistp_d(Address(rsp,0));
 98       __ pop(rax);
 99       __ pop(rdx);
100     } else {
101       __ movl(rax, src);
102       __ movl(rdx, src.plus_disp(wordSize));
103     }
104 #endif
105     break;
106   default: Unimplemented();
107   }
108 }
109 
110 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
111                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
112   bool in_heap = (decorators & IN_HEAP) != 0;
113   bool in_native = (decorators & IN_NATIVE) != 0;
114   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
115   bool atomic = (decorators & MO_RELAXED) != 0;
116 
117   switch (type) {
118   case T_OBJECT:
119   case T_ARRAY: {
120     if (in_heap) {
121       if (val == noreg) {
122         assert(!is_not_null, "inconsistent access");
123 #ifdef _LP64
124         if (UseCompressedOops) {
125           __ movl(dst, NULL_WORD);
126         } else {
127           __ movslq(dst, NULL_WORD);
128         }
129 #else
130         __ movl(dst, NULL_WORD);
131 #endif
132       } else {
133 #ifdef _LP64
134         if (UseCompressedOops) {
135           assert(!dst.uses(val), "not enough registers");
136           if (is_not_null) {
137             __ encode_heap_oop_not_null(val);
138           } else {
139             __ encode_heap_oop(val);
140           }
141           __ movl(dst, val);
142         } else
143 #endif
144         {
145           __ movptr(dst, val);
146         }
147       }
148     } else {
149       assert(in_native, "why else?");
150       assert(val != noreg, "not supported");
151       __ movptr(dst, val);
152     }
153     break;
154   }
155   case T_BOOLEAN:
156     __ andl(val, 0x1);  // boolean is true if LSB is 1
157     __ movb(dst, val);
158     break;
159   case T_BYTE:
160     __ movb(dst, val);
161     break;
162   case T_SHORT:
163     __ movw(dst, val);
164     break;
165   case T_CHAR:
166     __ movw(dst, val);
167     break;
168   case T_INT:
169     __ movl(dst, val);
170     break;
171   case T_LONG:
172     assert(val == noreg, "only tos");
173 #ifdef _LP64
174     __ movq(dst, rax);
175 #else
176     if (atomic) {
177       __ push(rdx);
178       __ push(rax);                 // Must update atomically with FIST
179       __ fild_d(Address(rsp,0));    // So load into FPU register
180       __ fistp_d(dst);              // and put into memory atomically
181       __ addptr(rsp, 2*wordSize);
182     } else {
183       __ movptr(dst, rax);
184       __ movptr(dst.plus_disp(wordSize), rdx);
185     }
186 #endif
187     break;
188   case T_FLOAT:
189     assert(val == noreg, "only tos");
190     __ store_float(dst);
191     break;
192   case T_DOUBLE:
193     assert(val == noreg, "only tos");
194     __ store_double(dst);
195     break;
196   case T_ADDRESS:
197     __ movptr(dst, val);
198     break;
199   default: Unimplemented();
200   }
201 }
202 
203 void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorators,
204                                      Register src, Register dst, Register value_klass) {
205   // value_copy implementation is fairly complex, and there are not any
206   // "short-cuts" to be made from asm. What there is, appears to have the same
207   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
208   // of hand-rolled instructions...
209   if (decorators & IS_DEST_UNINITIALIZED) {
210     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, value_klass);
211   } else {
212     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, value_klass);
213   }
214 }
215 
216 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
217                                      Register src, Register dst, Register inline_layout_info) {
218   // flat_field_copy implementation is fairly complex, and there are not any
219   // "short-cuts" to be made from asm. What there is, appears to have the same
220   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
221   // of hand-rolled instructions...
222   if (decorators & IS_DEST_UNINITIALIZED) {
223     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized2), src, dst, inline_layout_info);
224   } else {
225     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy2), src, dst, inline_layout_info);
226   }
227 }
228 
229 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
230                                        DecoratorSet decorators,
231                                        BasicType type,
232                                        size_t bytes,
233                                        Register dst,
234                                        Address src,
235                                        Register tmp) {
236   assert(bytes <= 8, "can only deal with non-vector registers");
237   switch (bytes) {
238   case 1:
239     __ movb(dst, src);
240     break;
241   case 2:
242     __ movw(dst, src);
243     break;
244   case 4:
245     __ movl(dst, src);
246     break;
247   case 8:
248 #ifdef _LP64
249     __ movq(dst, src);
250 #else
251     fatal("No support for 8 bytes copy");
252 #endif
253     break;
254   default:
255     fatal("Unexpected size");
256   }
257 #ifdef _LP64
258   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
259     __ decode_heap_oop(dst);
260   }
261 #endif
262 }
263 
264 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
265                                         DecoratorSet decorators,
266                                         BasicType type,
267                                         size_t bytes,
268                                         Address dst,
269                                         Register src,
270                                         Register tmp) {
271 #ifdef _LP64
272   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
273     __ encode_heap_oop(src);
274   }
275 #endif
276   assert(bytes <= 8, "can only deal with non-vector registers");
277   switch (bytes) {
278   case 1:
279     __ movb(dst, src);
280     break;
281   case 2:
282     __ movw(dst, src);
283     break;
284   case 4:
285     __ movl(dst, src);
286     break;
287   case 8:
288 #ifdef _LP64
289     __ movq(dst, src);
290 #else
291     fatal("No support for 8 bytes copy");
292 #endif
293     break;
294   default:
295     fatal("Unexpected size");
296   }
297 }
298 
299 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
300                                        DecoratorSet decorators,
301                                        BasicType type,
302                                        size_t bytes,
303                                        XMMRegister dst,
304                                        Address src,
305                                        Register tmp,
306                                        XMMRegister xmm_tmp) {
307   assert(bytes > 8, "can only deal with vector registers");
308   if (bytes == 16) {
309     __ movdqu(dst, src);
310   } else if (bytes == 32) {
311     __ vmovdqu(dst, src);
312   } else {
313     fatal("No support for >32 bytes copy");
314   }
315 }
316 
317 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
318                                         DecoratorSet decorators,
319                                         BasicType type,
320                                         size_t bytes,
321                                         Address dst,
322                                         XMMRegister src,
323                                         Register tmp1,
324                                         Register tmp2,
325                                         XMMRegister xmm_tmp) {
326   assert(bytes > 8, "can only deal with vector registers");
327   if (bytes == 16) {
328     __ movdqu(dst, src);
329   } else if (bytes == 32) {
330     __ vmovdqu(dst, src);
331   } else {
332     fatal("No support for >32 bytes copy");
333   }
334 }
335 
336 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
337                                                         Register obj, Register tmp, Label& slowpath) {
338   __ clear_jobject_tag(obj);
339   __ movptr(obj, Address(obj, 0));
340 }
341 
342 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
343                                         Register thread, Register obj,
344                                         Register var_size_in_bytes,
345                                         int con_size_in_bytes,
346                                         Register t1,
347                                         Register t2,
348                                         Label& slow_case) {
349   assert_different_registers(obj, t1, t2);
350   assert_different_registers(obj, var_size_in_bytes, t1);
351   Register end = t2;
352   if (!thread->is_valid()) {
353 #ifdef _LP64
354     thread = r15_thread;
355 #else
356     assert(t1->is_valid(), "need temp reg");
357     thread = t1;
358     __ get_thread(thread);
359 #endif
360   }
361 
362   __ verify_tlab();
363 
364   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
365   if (var_size_in_bytes == noreg) {
366     __ lea(end, Address(obj, con_size_in_bytes));
367   } else {
368     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
369   }
370   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
371   __ jcc(Assembler::above, slow_case);
372 
373   // update the tlab top pointer
374   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
375 
376   // recover var_size_in_bytes if necessary
377   if (var_size_in_bytes == end) {
378     __ subptr(var_size_in_bytes, obj);
379   }
380   __ verify_tlab();
381 }
382 
383 #ifdef _LP64
384 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
385   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
386   if (bs_nm == nullptr) {
387     return;
388   }
389   Register thread = r15_thread;
390   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
391   // The immediate is the last 4 bytes, so if we align the start of the cmp
392   // instruction to 4 bytes, we know that the second half of it is also 4
393   // byte aligned, which means that the immediate will not cross a cache line
394   __ align(4);
395   uintptr_t before_cmp = (uintptr_t)__ pc();
396   __ cmpl_imm32(disarmed_addr, 0);
397   uintptr_t after_cmp = (uintptr_t)__ pc();
398   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
399 
400   if (slow_path != nullptr) {
401     __ jcc(Assembler::notEqual, *slow_path);
402     __ bind(*continuation);
403   } else {
404     Label done;
405     __ jccb(Assembler::equal, done);
406     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
407     __ bind(done);
408   }
409 }
410 #else
411 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
412   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
413   if (bs_nm == nullptr) {
414     return;
415   }
416 
417   Label continuation;
418 
419   Register tmp = rdi;
420   __ push(tmp);
421   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
422   Address disarmed_addr(tmp, 0);
423   __ align(4);
424   __ cmpl_imm32(disarmed_addr, 0);
425   __ pop(tmp);
426   __ jcc(Assembler::equal, continuation);
427   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
428   __ bind(continuation);
429 }
430 #endif
431 
432 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
433   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
434   if (bs == nullptr) {
435     return;
436   }
437 
438   Label bad_call;
439   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
440   __ jcc(Assembler::equal, bad_call);
441 
442   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
443   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
444 #ifndef _LP64
445   __ push(tmp1);
446   __ push(tmp2);
447 #endif // !_LP64
448 
449   // Pointer chase to the method holder to find out if the method is concurrently unloading.
450   Label method_live;
451   __ load_method_holder_cld(tmp1, rbx);
452 
453    // Is it a strong CLD?
454   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0);
455   __ jcc(Assembler::greater, method_live);
456 
457    // Is it a weak but alive CLD?
458   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
459   __ resolve_weak_handle(tmp1, tmp2);
460   __ cmpptr(tmp1, 0);
461   __ jcc(Assembler::notEqual, method_live);
462 
463 #ifndef _LP64
464   __ pop(tmp2);
465   __ pop(tmp1);
466 #endif
467 
468   __ bind(bad_call);
469   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
470   __ bind(method_live);
471 
472 #ifndef _LP64
473   __ pop(tmp2);
474   __ pop(tmp1);
475 #endif
476 }
477 
478 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
479   // Check if the oop is in the right area of memory
480   __ movptr(tmp1, obj);
481   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
482   __ andptr(tmp1, tmp2);
483   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
484   __ cmpptr(tmp1, tmp2);
485   __ jcc(Assembler::notZero, error);
486 
487   // make sure klass is 'reasonable', which is not zero.
488   __ load_klass(obj, obj, tmp1);  // get klass
489   __ testptr(obj, obj);
490   __ jcc(Assembler::zero, error); // if klass is null it is broken
491 }
492 
493 #ifdef COMPILER2
494 
495 #ifdef _LP64
496 
497 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
498   if (!OptoReg::is_reg(opto_reg)) {
499     return OptoReg::Bad;
500   }
501 
502   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
503   if (vm_reg->is_XMMRegister()) {
504     opto_reg &= ~15;
505     switch (node->ideal_reg()) {
506     case Op_VecX:
507       opto_reg |= 2;
508       break;
509     case Op_VecY:
510       opto_reg |= 4;
511       break;
512     case Op_VecZ:
513       opto_reg |= 8;
514       break;
515     default:
516       opto_reg |= 1;
517       break;
518     }
519   }
520 
521   return opto_reg;
522 }
523 
524 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
525 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
526                             int stack_offset, int reg, uint ireg, outputStream* st);
527 
528 #undef __
529 #define __ _masm->
530 
531 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
532   if (left->_size == right->_size) {
533     return 0;
534   }
535 
536   return (left->_size < right->_size) ? -1 : 1;
537 }
538 
539 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) {
540   // The low order 4 bytes denote what size of the XMM register is live
541   return (opto_reg & 15) << 3;
542 }
543 
544 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) {
545   switch (reg_size) {
546   case 8:
547     return Op_VecD;
548   case 16:
549     return Op_VecX;
550   case 32:
551     return Op_VecY;
552   case 64:
553     return Op_VecZ;
554   default:
555     fatal("Invalid register size %d", reg_size);
556     return 0;
557   }
558 }
559 
560 bool SaveLiveRegisters::xmm_needs_vzeroupper() const {
561   return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
562 }
563 
564 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) {
565   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
566   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
567   _spill_offset -= reg_data._size;
568   C2_MacroAssembler c2_masm(__ code());
569   vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
570 }
571 
572 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) {
573   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
574   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
575   C2_MacroAssembler c2_masm(__ code());
576   vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
577   _spill_offset += reg_data._size;
578 }
579 
580 void SaveLiveRegisters::gp_register_save(Register reg) {
581   _spill_offset -= 8;
582   __ movq(Address(rsp, _spill_offset), reg);
583 }
584 
585 void SaveLiveRegisters::opmask_register_save(KRegister reg) {
586   _spill_offset -= 8;
587   __ kmov(Address(rsp, _spill_offset), reg);
588 }
589 
590 void SaveLiveRegisters::gp_register_restore(Register reg) {
591   __ movq(reg, Address(rsp, _spill_offset));
592   _spill_offset += 8;
593 }
594 
595 void SaveLiveRegisters::opmask_register_restore(KRegister reg) {
596   __ kmov(reg, Address(rsp, _spill_offset));
597   _spill_offset += 8;
598 }
599 
600 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
601   // Create mask of caller saved registers that need to
602   // be saved/restored if live
603   RegMask caller_saved;
604   caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
605   caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
606   caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
607   caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
608   caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
609   caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
610   caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
611   caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
612   caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
613 
614   if (UseAPX) {
615     caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
616     caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
617     caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
618     caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
619     caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
620     caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
621     caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
622     caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
623     caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
624     caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
625     caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
626     caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
627     caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
628     caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
629     caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
630     caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
631   }
632 
633   int gp_spill_size = 0;
634   int opmask_spill_size = 0;
635   int xmm_spill_size = 0;
636 
637   // Record registers that needs to be saved/restored
638   RegMaskIterator rmi(stub->preserve_set());
639   while (rmi.has_next()) {
640     const OptoReg::Name opto_reg = rmi.next();
641     const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
642 
643     if (vm_reg->is_Register()) {
644       if (caller_saved.Member(opto_reg)) {
645         _gp_registers.append(vm_reg->as_Register());
646         gp_spill_size += 8;
647       }
648     } else if (vm_reg->is_KRegister()) {
649       // All opmask registers are caller saved, thus spill the ones
650       // which are live.
651       if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
652         _opmask_registers.append(vm_reg->as_KRegister());
653         opmask_spill_size += 8;
654       }
655     } else if (vm_reg->is_XMMRegister()) {
656       // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
657       const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
658       const int reg_size = xmm_slot_size(opto_reg);
659       const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
660       const int reg_index = _xmm_registers.find(reg_data);
661       if (reg_index == -1) {
662         // Not previously appended
663         _xmm_registers.append(reg_data);
664         xmm_spill_size += reg_size;
665       } else {
666         // Previously appended, update size
667         const int reg_size_prev = _xmm_registers.at(reg_index)._size;
668         if (reg_size > reg_size_prev) {
669           _xmm_registers.at_put(reg_index, reg_data);
670           xmm_spill_size += reg_size - reg_size_prev;
671         }
672       }
673     } else {
674       fatal("Unexpected register type");
675     }
676   }
677 
678   // Sort by size, largest first
679   _xmm_registers.sort(xmm_compare_register_size);
680 
681   // On Windows, the caller reserves stack space for spilling register arguments
682   const int arg_spill_size = frame::arg_reg_save_area_bytes;
683 
684   // Stack pointer must be 16 bytes aligned for the call
685   _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
686 }
687 
688 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
689   : _masm(masm),
690     _gp_registers(),
691     _opmask_registers(),
692     _xmm_registers(),
693     _spill_size(0),
694     _spill_offset(0) {
695 
696   //
697   // Stack layout after registers have been spilled:
698   //
699   // | ...            | original rsp, 16 bytes aligned
700   // ------------------
701   // | zmm0 high      |
702   // | ...            |
703   // | zmm0 low       | 16 bytes aligned
704   // | ...            |
705   // | ymm1 high      |
706   // | ...            |
707   // | ymm1 low       | 16 bytes aligned
708   // | ...            |
709   // | xmmN high      |
710   // | ...            |
711   // | xmmN low       | 8 bytes aligned
712   // | reg0           | 8 bytes aligned
713   // | reg1           |
714   // | ...            |
715   // | regN           | new rsp, if 16 bytes aligned
716   // | <padding>      | else new rsp, 16 bytes aligned
717   // ------------------
718   //
719 
720   // Figure out what registers to save/restore
721   initialize(stub);
722 
723   // Allocate stack space
724   if (_spill_size > 0) {
725     __ subptr(rsp, _spill_size);
726   }
727 
728   // Save XMM/YMM/ZMM registers
729   for (int i = 0; i < _xmm_registers.length(); i++) {
730     xmm_register_save(_xmm_registers.at(i));
731   }
732 
733   if (xmm_needs_vzeroupper()) {
734     __ vzeroupper();
735   }
736 
737   // Save general purpose registers
738   for (int i = 0; i < _gp_registers.length(); i++) {
739     gp_register_save(_gp_registers.at(i));
740   }
741 
742   // Save opmask registers
743   for (int i = 0; i < _opmask_registers.length(); i++) {
744     opmask_register_save(_opmask_registers.at(i));
745   }
746 }
747 
748 SaveLiveRegisters::~SaveLiveRegisters() {
749   // Restore opmask registers
750   for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
751     opmask_register_restore(_opmask_registers.at(i));
752   }
753 
754   // Restore general purpose registers
755   for (int i = _gp_registers.length() - 1; i >= 0; i--) {
756     gp_register_restore(_gp_registers.at(i));
757   }
758 
759   __ vzeroupper();
760 
761   // Restore XMM/YMM/ZMM registers
762   for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
763     xmm_register_restore(_xmm_registers.at(i));
764   }
765 
766   // Free stack space
767   if (_spill_size > 0) {
768     __ addptr(rsp, _spill_size);
769   }
770 }
771 
772 #else // !_LP64
773 
774 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
775   Unimplemented(); // This must be implemented to support late barrier expansion.
776 }
777 
778 #endif // _LP64
779 
780 #endif // COMPILER2