1 /*
  2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "classfile/classLoaderData.hpp"
 28 #include "gc/shared/barrierSet.hpp"
 29 #include "gc/shared/barrierSetAssembler.hpp"
 30 #include "gc/shared/barrierSetNMethod.hpp"
 31 #include "gc/shared/barrierSetRuntime.hpp"
 32 #include "gc/shared/collectedHeap.hpp"
 33 #include "interpreter/interp_masm.hpp"
 34 #include "memory/universe.hpp"
 35 #include "runtime/javaThread.hpp"
 36 #include "runtime/jniHandles.hpp"
 37 #include "runtime/sharedRuntime.hpp"
 38 #include "runtime/stubRoutines.hpp"
 39 #ifdef COMPILER2
 40 #include "gc/shared/c2/barrierSetC2.hpp"
 41 #endif // COMPILER2
 42 
 43 #define __ masm->
 44 
 45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 46                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 47   bool in_heap = (decorators & IN_HEAP) != 0;
 48   bool in_native = (decorators & IN_NATIVE) != 0;
 49   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 50   bool atomic = (decorators & MO_RELAXED) != 0;
 51 
 52   switch (type) {
 53   case T_OBJECT:
 54   case T_ARRAY: {
 55     if (in_heap) {
 56 #ifdef _LP64
 57       if (UseCompressedOops) {
 58         __ movl(dst, src);
 59         if (is_not_null) {
 60           __ decode_heap_oop_not_null(dst);
 61         } else {
 62           __ decode_heap_oop(dst);
 63         }
 64       } else
 65 #endif
 66       {
 67         __ movptr(dst, src);
 68       }
 69     } else {
 70       assert(in_native, "why else?");
 71       __ movptr(dst, src);
 72     }
 73     break;
 74   }
 75   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 76   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 77   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 78   case T_SHORT:   __ load_signed_short(dst, src);   break;
 79   case T_INT:     __ movl  (dst, src);              break;
 80   case T_ADDRESS: __ movptr(dst, src);              break;
 81   case T_FLOAT:
 82     assert(dst == noreg, "only to ftos");
 83     __ load_float(src);
 84     break;
 85   case T_DOUBLE:
 86     assert(dst == noreg, "only to dtos");
 87     __ load_double(src);
 88     break;
 89   case T_LONG:
 90     assert(dst == noreg, "only to ltos");
 91 #ifdef _LP64
 92     __ movq(rax, src);
 93 #else
 94     if (atomic) {
 95       __ fild_d(src);               // Must load atomically
 96       __ subptr(rsp,2*wordSize);    // Make space for store
 97       __ fistp_d(Address(rsp,0));
 98       __ pop(rax);
 99       __ pop(rdx);
100     } else {
101       __ movl(rax, src);
102       __ movl(rdx, src.plus_disp(wordSize));
103     }
104 #endif
105     break;
106   default: Unimplemented();
107   }
108 }
109 
110 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
111                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
112   bool in_heap = (decorators & IN_HEAP) != 0;
113   bool in_native = (decorators & IN_NATIVE) != 0;
114   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
115   bool atomic = (decorators & MO_RELAXED) != 0;
116 
117   switch (type) {
118   case T_OBJECT:
119   case T_ARRAY: {
120     if (in_heap) {
121       if (val == noreg) {
122         assert(!is_not_null, "inconsistent access");
123 #ifdef _LP64
124         if (UseCompressedOops) {
125           __ movl(dst, NULL_WORD);
126         } else {
127           __ movslq(dst, NULL_WORD);
128         }
129 #else
130         __ movl(dst, NULL_WORD);
131 #endif
132       } else {
133 #ifdef _LP64
134         if (UseCompressedOops) {
135           assert(!dst.uses(val), "not enough registers");
136           if (is_not_null) {
137             __ encode_heap_oop_not_null(val);
138           } else {
139             __ encode_heap_oop(val);
140           }
141           __ movl(dst, val);
142         } else
143 #endif
144         {
145           __ movptr(dst, val);
146         }
147       }
148     } else {
149       assert(in_native, "why else?");
150       assert(val != noreg, "not supported");
151       __ movptr(dst, val);
152     }
153     break;
154   }
155   case T_BOOLEAN:
156     __ andl(val, 0x1);  // boolean is true if LSB is 1
157     __ movb(dst, val);
158     break;
159   case T_BYTE:
160     __ movb(dst, val);
161     break;
162   case T_SHORT:
163     __ movw(dst, val);
164     break;
165   case T_CHAR:
166     __ movw(dst, val);
167     break;
168   case T_INT:
169     __ movl(dst, val);
170     break;
171   case T_LONG:
172     assert(val == noreg, "only tos");
173 #ifdef _LP64
174     __ movq(dst, rax);
175 #else
176     if (atomic) {
177       __ push(rdx);
178       __ push(rax);                 // Must update atomically with FIST
179       __ fild_d(Address(rsp,0));    // So load into FPU register
180       __ fistp_d(dst);              // and put into memory atomically
181       __ addptr(rsp, 2*wordSize);
182     } else {
183       __ movptr(dst, rax);
184       __ movptr(dst.plus_disp(wordSize), rdx);
185     }
186 #endif
187     break;
188   case T_FLOAT:
189     assert(val == noreg, "only tos");
190     __ store_float(dst);
191     break;
192   case T_DOUBLE:
193     assert(val == noreg, "only tos");
194     __ store_double(dst);
195     break;
196   case T_ADDRESS:
197     __ movptr(dst, val);
198     break;
199   default: Unimplemented();
200   }
201 }
202 
203 void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorators,
204                                      Register src, Register dst, Register value_klass) {
205   // value_copy implementation is fairly complex, and there are not any
206   // "short-cuts" to be made from asm. What there is, appears to have the same
207   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
208   // of hand-rolled instructions...
209   if (decorators & IS_DEST_UNINITIALIZED) {
210     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, value_klass);
211   } else {
212     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, value_klass);
213   }
214 }
215 
216 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
217                                        DecoratorSet decorators,
218                                        BasicType type,
219                                        size_t bytes,
220                                        Register dst,
221                                        Address src,
222                                        Register tmp) {
223   assert(bytes <= 8, "can only deal with non-vector registers");
224   switch (bytes) {
225   case 1:
226     __ movb(dst, src);
227     break;
228   case 2:
229     __ movw(dst, src);
230     break;
231   case 4:
232     __ movl(dst, src);
233     break;
234   case 8:
235 #ifdef _LP64
236     __ movq(dst, src);
237 #else
238     fatal("No support for 8 bytes copy");
239 #endif
240     break;
241   default:
242     fatal("Unexpected size");
243   }
244 #ifdef _LP64
245   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
246     __ decode_heap_oop(dst);
247   }
248 #endif
249 }
250 
251 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
252                                         DecoratorSet decorators,
253                                         BasicType type,
254                                         size_t bytes,
255                                         Address dst,
256                                         Register src,
257                                         Register tmp) {
258 #ifdef _LP64
259   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
260     __ encode_heap_oop(src);
261   }
262 #endif
263   assert(bytes <= 8, "can only deal with non-vector registers");
264   switch (bytes) {
265   case 1:
266     __ movb(dst, src);
267     break;
268   case 2:
269     __ movw(dst, src);
270     break;
271   case 4:
272     __ movl(dst, src);
273     break;
274   case 8:
275 #ifdef _LP64
276     __ movq(dst, src);
277 #else
278     fatal("No support for 8 bytes copy");
279 #endif
280     break;
281   default:
282     fatal("Unexpected size");
283   }
284 }
285 
286 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
287                                        DecoratorSet decorators,
288                                        BasicType type,
289                                        size_t bytes,
290                                        XMMRegister dst,
291                                        Address src,
292                                        Register tmp,
293                                        XMMRegister xmm_tmp) {
294   assert(bytes > 8, "can only deal with vector registers");
295   if (bytes == 16) {
296     __ movdqu(dst, src);
297   } else if (bytes == 32) {
298     __ vmovdqu(dst, src);
299   } else {
300     fatal("No support for >32 bytes copy");
301   }
302 }
303 
304 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
305                                         DecoratorSet decorators,
306                                         BasicType type,
307                                         size_t bytes,
308                                         Address dst,
309                                         XMMRegister src,
310                                         Register tmp1,
311                                         Register tmp2,
312                                         XMMRegister xmm_tmp) {
313   assert(bytes > 8, "can only deal with vector registers");
314   if (bytes == 16) {
315     __ movdqu(dst, src);
316   } else if (bytes == 32) {
317     __ vmovdqu(dst, src);
318   } else {
319     fatal("No support for >32 bytes copy");
320   }
321 }
322 
323 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
324                                                         Register obj, Register tmp, Label& slowpath) {
325   __ clear_jobject_tag(obj);
326   __ movptr(obj, Address(obj, 0));
327 }
328 
329 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
330                                         Register thread, Register obj,
331                                         Register var_size_in_bytes,
332                                         int con_size_in_bytes,
333                                         Register t1,
334                                         Register t2,
335                                         Label& slow_case) {
336   assert_different_registers(obj, t1, t2);
337   assert_different_registers(obj, var_size_in_bytes, t1);
338   Register end = t2;
339   if (!thread->is_valid()) {
340 #ifdef _LP64
341     thread = r15_thread;
342 #else
343     assert(t1->is_valid(), "need temp reg");
344     thread = t1;
345     __ get_thread(thread);
346 #endif
347   }
348 
349   __ verify_tlab();
350 
351   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
352   if (var_size_in_bytes == noreg) {
353     __ lea(end, Address(obj, con_size_in_bytes));
354   } else {
355     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
356   }
357   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
358   __ jcc(Assembler::above, slow_case);
359 
360   // update the tlab top pointer
361   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
362 
363   // recover var_size_in_bytes if necessary
364   if (var_size_in_bytes == end) {
365     __ subptr(var_size_in_bytes, obj);
366   }
367   __ verify_tlab();
368 }
369 
370 #ifdef _LP64
371 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
372   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
373   if (bs_nm == nullptr) {
374     return;
375   }
376   Register thread = r15_thread;
377   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
378   // The immediate is the last 4 bytes, so if we align the start of the cmp
379   // instruction to 4 bytes, we know that the second half of it is also 4
380   // byte aligned, which means that the immediate will not cross a cache line
381   __ align(4);
382   uintptr_t before_cmp = (uintptr_t)__ pc();
383   __ cmpl_imm32(disarmed_addr, 0);
384   uintptr_t after_cmp = (uintptr_t)__ pc();
385   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
386 
387   if (slow_path != nullptr) {
388     __ jcc(Assembler::notEqual, *slow_path);
389     __ bind(*continuation);
390   } else {
391     Label done;
392     __ jccb(Assembler::equal, done);
393     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
394     __ bind(done);
395   }
396 }
397 #else
398 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
399   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
400   if (bs_nm == nullptr) {
401     return;
402   }
403 
404   Label continuation;
405 
406   Register tmp = rdi;
407   __ push(tmp);
408   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
409   Address disarmed_addr(tmp, 0);
410   __ align(4);
411   __ cmpl_imm32(disarmed_addr, 0);
412   __ pop(tmp);
413   __ jcc(Assembler::equal, continuation);
414   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
415   __ bind(continuation);
416 }
417 #endif
418 
419 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
420   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
421   if (bs == nullptr) {
422     return;
423   }
424 
425   Label bad_call;
426   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
427   __ jcc(Assembler::equal, bad_call);
428 
429   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
430   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
431 #ifndef _LP64
432   __ push(tmp1);
433   __ push(tmp2);
434 #endif // !_LP64
435 
436   // Pointer chase to the method holder to find out if the method is concurrently unloading.
437   Label method_live;
438   __ load_method_holder_cld(tmp1, rbx);
439 
440    // Is it a strong CLD?
441   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0);
442   __ jcc(Assembler::greater, method_live);
443 
444    // Is it a weak but alive CLD?
445   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
446   __ resolve_weak_handle(tmp1, tmp2);
447   __ cmpptr(tmp1, 0);
448   __ jcc(Assembler::notEqual, method_live);
449 
450 #ifndef _LP64
451   __ pop(tmp2);
452   __ pop(tmp1);
453 #endif
454 
455   __ bind(bad_call);
456   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
457   __ bind(method_live);
458 
459 #ifndef _LP64
460   __ pop(tmp2);
461   __ pop(tmp1);
462 #endif
463 }
464 
465 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
466   // Check if the oop is in the right area of memory
467   __ movptr(tmp1, obj);
468   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
469   __ andptr(tmp1, tmp2);
470   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
471   __ cmpptr(tmp1, tmp2);
472   __ jcc(Assembler::notZero, error);
473 
474   // make sure klass is 'reasonable', which is not zero.
475   __ load_klass(obj, obj, tmp1);  // get klass
476   __ testptr(obj, obj);
477   __ jcc(Assembler::zero, error); // if klass is null it is broken
478 }
479 
480 #ifdef COMPILER2
481 
482 #ifdef _LP64
483 
484 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
485   if (!OptoReg::is_reg(opto_reg)) {
486     return OptoReg::Bad;
487   }
488 
489   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
490   if (vm_reg->is_XMMRegister()) {
491     opto_reg &= ~15;
492     switch (node->ideal_reg()) {
493     case Op_VecX:
494       opto_reg |= 2;
495       break;
496     case Op_VecY:
497       opto_reg |= 4;
498       break;
499     case Op_VecZ:
500       opto_reg |= 8;
501       break;
502     default:
503       opto_reg |= 1;
504       break;
505     }
506   }
507 
508   return opto_reg;
509 }
510 
511 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
512 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
513                             int stack_offset, int reg, uint ireg, outputStream* st);
514 
515 #undef __
516 #define __ _masm->
517 
518 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
519   if (left->_size == right->_size) {
520     return 0;
521   }
522 
523   return (left->_size < right->_size) ? -1 : 1;
524 }
525 
526 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) {
527   // The low order 4 bytes denote what size of the XMM register is live
528   return (opto_reg & 15) << 3;
529 }
530 
531 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) {
532   switch (reg_size) {
533   case 8:
534     return Op_VecD;
535   case 16:
536     return Op_VecX;
537   case 32:
538     return Op_VecY;
539   case 64:
540     return Op_VecZ;
541   default:
542     fatal("Invalid register size %d", reg_size);
543     return 0;
544   }
545 }
546 
547 bool SaveLiveRegisters::xmm_needs_vzeroupper() const {
548   return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
549 }
550 
551 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) {
552   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
553   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
554   _spill_offset -= reg_data._size;
555   C2_MacroAssembler c2_masm(__ code());
556   vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
557 }
558 
559 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) {
560   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
561   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
562   C2_MacroAssembler c2_masm(__ code());
563   vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
564   _spill_offset += reg_data._size;
565 }
566 
567 void SaveLiveRegisters::gp_register_save(Register reg) {
568   _spill_offset -= 8;
569   __ movq(Address(rsp, _spill_offset), reg);
570 }
571 
572 void SaveLiveRegisters::opmask_register_save(KRegister reg) {
573   _spill_offset -= 8;
574   __ kmov(Address(rsp, _spill_offset), reg);
575 }
576 
577 void SaveLiveRegisters::gp_register_restore(Register reg) {
578   __ movq(reg, Address(rsp, _spill_offset));
579   _spill_offset += 8;
580 }
581 
582 void SaveLiveRegisters::opmask_register_restore(KRegister reg) {
583   __ kmov(reg, Address(rsp, _spill_offset));
584   _spill_offset += 8;
585 }
586 
587 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
588   // Create mask of caller saved registers that need to
589   // be saved/restored if live
590   RegMask caller_saved;
591   caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
592   caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
593   caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
594   caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
595   caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
596   caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
597   caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
598   caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
599   caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
600 
601   if (UseAPX) {
602     caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
603     caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
604     caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
605     caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
606     caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
607     caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
608     caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
609     caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
610     caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
611     caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
612     caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
613     caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
614     caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
615     caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
616     caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
617     caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
618   }
619 
620   int gp_spill_size = 0;
621   int opmask_spill_size = 0;
622   int xmm_spill_size = 0;
623 
624   // Record registers that needs to be saved/restored
625   RegMaskIterator rmi(stub->preserve_set());
626   while (rmi.has_next()) {
627     const OptoReg::Name opto_reg = rmi.next();
628     const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
629 
630     if (vm_reg->is_Register()) {
631       if (caller_saved.Member(opto_reg)) {
632         _gp_registers.append(vm_reg->as_Register());
633         gp_spill_size += 8;
634       }
635     } else if (vm_reg->is_KRegister()) {
636       // All opmask registers are caller saved, thus spill the ones
637       // which are live.
638       if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
639         _opmask_registers.append(vm_reg->as_KRegister());
640         opmask_spill_size += 8;
641       }
642     } else if (vm_reg->is_XMMRegister()) {
643       // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
644       const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
645       const int reg_size = xmm_slot_size(opto_reg);
646       const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
647       const int reg_index = _xmm_registers.find(reg_data);
648       if (reg_index == -1) {
649         // Not previously appended
650         _xmm_registers.append(reg_data);
651         xmm_spill_size += reg_size;
652       } else {
653         // Previously appended, update size
654         const int reg_size_prev = _xmm_registers.at(reg_index)._size;
655         if (reg_size > reg_size_prev) {
656           _xmm_registers.at_put(reg_index, reg_data);
657           xmm_spill_size += reg_size - reg_size_prev;
658         }
659       }
660     } else {
661       fatal("Unexpected register type");
662     }
663   }
664 
665   // Sort by size, largest first
666   _xmm_registers.sort(xmm_compare_register_size);
667 
668   // On Windows, the caller reserves stack space for spilling register arguments
669   const int arg_spill_size = frame::arg_reg_save_area_bytes;
670 
671   // Stack pointer must be 16 bytes aligned for the call
672   _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
673 }
674 
675 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
676   : _masm(masm),
677     _gp_registers(),
678     _opmask_registers(),
679     _xmm_registers(),
680     _spill_size(0),
681     _spill_offset(0) {
682 
683   //
684   // Stack layout after registers have been spilled:
685   //
686   // | ...            | original rsp, 16 bytes aligned
687   // ------------------
688   // | zmm0 high      |
689   // | ...            |
690   // | zmm0 low       | 16 bytes aligned
691   // | ...            |
692   // | ymm1 high      |
693   // | ...            |
694   // | ymm1 low       | 16 bytes aligned
695   // | ...            |
696   // | xmmN high      |
697   // | ...            |
698   // | xmmN low       | 8 bytes aligned
699   // | reg0           | 8 bytes aligned
700   // | reg1           |
701   // | ...            |
702   // | regN           | new rsp, if 16 bytes aligned
703   // | <padding>      | else new rsp, 16 bytes aligned
704   // ------------------
705   //
706 
707   // Figure out what registers to save/restore
708   initialize(stub);
709 
710   // Allocate stack space
711   if (_spill_size > 0) {
712     __ subptr(rsp, _spill_size);
713   }
714 
715   // Save XMM/YMM/ZMM registers
716   for (int i = 0; i < _xmm_registers.length(); i++) {
717     xmm_register_save(_xmm_registers.at(i));
718   }
719 
720   if (xmm_needs_vzeroupper()) {
721     __ vzeroupper();
722   }
723 
724   // Save general purpose registers
725   for (int i = 0; i < _gp_registers.length(); i++) {
726     gp_register_save(_gp_registers.at(i));
727   }
728 
729   // Save opmask registers
730   for (int i = 0; i < _opmask_registers.length(); i++) {
731     opmask_register_save(_opmask_registers.at(i));
732   }
733 }
734 
735 SaveLiveRegisters::~SaveLiveRegisters() {
736   // Restore opmask registers
737   for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
738     opmask_register_restore(_opmask_registers.at(i));
739   }
740 
741   // Restore general purpose registers
742   for (int i = _gp_registers.length() - 1; i >= 0; i--) {
743     gp_register_restore(_gp_registers.at(i));
744   }
745 
746   __ vzeroupper();
747 
748   // Restore XMM/YMM/ZMM registers
749   for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
750     xmm_register_restore(_xmm_registers.at(i));
751   }
752 
753   // Free stack space
754   if (_spill_size > 0) {
755     __ addptr(rsp, _spill_size);
756   }
757 }
758 
759 #else // !_LP64
760 
761 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
762   Unimplemented(); // This must be implemented to support late barrier expansion.
763 }
764 
765 #endif // _LP64
766 
767 #endif // COMPILER2