1 /*
  2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "classfile/classLoaderData.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/barrierSetAssembler.hpp"
 29 #include "gc/shared/barrierSetNMethod.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "interpreter/interp_masm.hpp"
 32 #include "memory/universe.hpp"
 33 #include "runtime/javaThread.hpp"
 34 #include "runtime/jniHandles.hpp"
 35 #include "runtime/sharedRuntime.hpp"
 36 #include "runtime/stubRoutines.hpp"
 37 #ifdef COMPILER2
 38 #include "gc/shared/c2/barrierSetC2.hpp"
 39 #endif // COMPILER2
 40 
 41 #define __ masm->
 42 
 43 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 44                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 45   bool in_heap = (decorators & IN_HEAP) != 0;
 46   bool in_native = (decorators & IN_NATIVE) != 0;
 47   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 48   bool atomic = (decorators & MO_RELAXED) != 0;
 49 
 50   switch (type) {
 51   case T_OBJECT:
 52   case T_ARRAY: {
 53     if (in_heap) {
 54 #ifdef _LP64
 55       if (UseCompressedOops) {
 56         __ movl(dst, src);
 57         if (is_not_null) {
 58           __ decode_heap_oop_not_null(dst);
 59         } else {
 60           __ decode_heap_oop(dst);
 61         }
 62       } else
 63 #endif
 64       {
 65         __ movptr(dst, src);
 66       }
 67     } else {
 68       assert(in_native, "why else?");
 69       __ movptr(dst, src);
 70     }
 71     break;
 72   }
 73   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 74   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 75   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 76   case T_SHORT:   __ load_signed_short(dst, src);   break;
 77   case T_INT:     __ movl  (dst, src);              break;
 78   case T_ADDRESS: __ movptr(dst, src);              break;
 79   case T_FLOAT:
 80     assert(dst == noreg, "only to ftos");
 81     __ load_float(src);
 82     break;
 83   case T_DOUBLE:
 84     assert(dst == noreg, "only to dtos");
 85     __ load_double(src);
 86     break;
 87   case T_LONG:
 88     assert(dst == noreg, "only to ltos");
 89 #ifdef _LP64
 90     __ movq(rax, src);
 91 #else
 92     if (atomic) {
 93       __ fild_d(src);               // Must load atomically
 94       __ subptr(rsp,2*wordSize);    // Make space for store
 95       __ fistp_d(Address(rsp,0));
 96       __ pop(rax);
 97       __ pop(rdx);
 98     } else {
 99       __ movl(rax, src);
100       __ movl(rdx, src.plus_disp(wordSize));
101     }
102 #endif
103     break;
104   default: Unimplemented();
105   }
106 }
107 
108 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
109                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
110   bool in_heap = (decorators & IN_HEAP) != 0;
111   bool in_native = (decorators & IN_NATIVE) != 0;
112   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
113   bool atomic = (decorators & MO_RELAXED) != 0;
114 
115   switch (type) {
116   case T_OBJECT:
117   case T_ARRAY: {
118     if (in_heap) {
119       if (val == noreg) {
120         assert(!is_not_null, "inconsistent access");
121 #ifdef _LP64
122         if (UseCompressedOops) {
123           __ movl(dst, NULL_WORD);
124         } else {
125           __ movslq(dst, NULL_WORD);
126         }
127 #else
128         __ movl(dst, NULL_WORD);
129 #endif
130       } else {
131 #ifdef _LP64
132         if (UseCompressedOops) {
133           assert(!dst.uses(val), "not enough registers");
134           if (is_not_null) {
135             __ encode_heap_oop_not_null(val);
136           } else {
137             __ encode_heap_oop(val);
138           }
139           __ movl(dst, val);
140         } else
141 #endif
142         {
143           __ movptr(dst, val);
144         }
145       }
146     } else {
147       assert(in_native, "why else?");
148       assert(val != noreg, "not supported");
149       __ movptr(dst, val);
150     }
151     break;
152   }
153   case T_BOOLEAN:
154     __ andl(val, 0x1);  // boolean is true if LSB is 1
155     __ movb(dst, val);
156     break;
157   case T_BYTE:
158     __ movb(dst, val);
159     break;
160   case T_SHORT:
161     __ movw(dst, val);
162     break;
163   case T_CHAR:
164     __ movw(dst, val);
165     break;
166   case T_INT:
167     __ movl(dst, val);
168     break;
169   case T_LONG:
170     assert(val == noreg, "only tos");
171 #ifdef _LP64
172     __ movq(dst, rax);
173 #else
174     if (atomic) {
175       __ push(rdx);
176       __ push(rax);                 // Must update atomically with FIST
177       __ fild_d(Address(rsp,0));    // So load into FPU register
178       __ fistp_d(dst);              // and put into memory atomically
179       __ addptr(rsp, 2*wordSize);
180     } else {
181       __ movptr(dst, rax);
182       __ movptr(dst.plus_disp(wordSize), rdx);
183     }
184 #endif
185     break;
186   case T_FLOAT:
187     assert(val == noreg, "only tos");
188     __ store_float(dst);
189     break;
190   case T_DOUBLE:
191     assert(val == noreg, "only tos");
192     __ store_double(dst);
193     break;
194   case T_ADDRESS:
195     __ movptr(dst, val);
196     break;
197   default: Unimplemented();
198   }
199 }
200 
201 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
202                                        DecoratorSet decorators,
203                                        BasicType type,
204                                        size_t bytes,
205                                        Register dst,
206                                        Address src,
207                                        Register tmp) {
208   assert(bytes <= 8, "can only deal with non-vector registers");
209   switch (bytes) {
210   case 1:
211     __ movb(dst, src);
212     break;
213   case 2:
214     __ movw(dst, src);
215     break;
216   case 4:
217     __ movl(dst, src);
218     break;
219   case 8:
220 #ifdef _LP64
221     __ movq(dst, src);
222 #else
223     fatal("No support for 8 bytes copy");
224 #endif
225     break;
226   default:
227     fatal("Unexpected size");
228   }
229 #ifdef _LP64
230   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
231     __ decode_heap_oop(dst);
232   }
233 #endif
234 }
235 
236 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
237                                         DecoratorSet decorators,
238                                         BasicType type,
239                                         size_t bytes,
240                                         Address dst,
241                                         Register src,
242                                         Register tmp) {
243 #ifdef _LP64
244   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
245     __ encode_heap_oop(src);
246   }
247 #endif
248   assert(bytes <= 8, "can only deal with non-vector registers");
249   switch (bytes) {
250   case 1:
251     __ movb(dst, src);
252     break;
253   case 2:
254     __ movw(dst, src);
255     break;
256   case 4:
257     __ movl(dst, src);
258     break;
259   case 8:
260 #ifdef _LP64
261     __ movq(dst, src);
262 #else
263     fatal("No support for 8 bytes copy");
264 #endif
265     break;
266   default:
267     fatal("Unexpected size");
268   }
269 }
270 
271 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
272                                        DecoratorSet decorators,
273                                        BasicType type,
274                                        size_t bytes,
275                                        XMMRegister dst,
276                                        Address src,
277                                        Register tmp,
278                                        XMMRegister xmm_tmp) {
279   assert(bytes > 8, "can only deal with vector registers");
280   if (bytes == 16) {
281     __ movdqu(dst, src);
282   } else if (bytes == 32) {
283     __ vmovdqu(dst, src);
284   } else {
285     fatal("No support for >32 bytes copy");
286   }
287 }
288 
289 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
290                                         DecoratorSet decorators,
291                                         BasicType type,
292                                         size_t bytes,
293                                         Address dst,
294                                         XMMRegister src,
295                                         Register tmp1,
296                                         Register tmp2,
297                                         XMMRegister xmm_tmp) {
298   assert(bytes > 8, "can only deal with vector registers");
299   if (bytes == 16) {
300     __ movdqu(dst, src);
301   } else if (bytes == 32) {
302     __ vmovdqu(dst, src);
303   } else {
304     fatal("No support for >32 bytes copy");
305   }
306 }
307 
308 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
309                                                         Register obj, Register tmp, Label& slowpath) {
310   __ clear_jobject_tag(obj);
311   __ movptr(obj, Address(obj, 0));
312 }
313 
314 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
315                                         Register thread, Register obj,
316                                         Register var_size_in_bytes,
317                                         int con_size_in_bytes,
318                                         Register t1,
319                                         Register t2,
320                                         Label& slow_case) {
321   assert_different_registers(obj, t1, t2);
322   assert_different_registers(obj, var_size_in_bytes, t1);
323   Register end = t2;
324   if (!thread->is_valid()) {
325 #ifdef _LP64
326     thread = r15_thread;
327 #else
328     assert(t1->is_valid(), "need temp reg");
329     thread = t1;
330     __ get_thread(thread);
331 #endif
332   }
333 
334   __ verify_tlab();
335 
336   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
337   if (var_size_in_bytes == noreg) {
338     __ lea(end, Address(obj, con_size_in_bytes));
339   } else {
340     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
341   }
342   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
343   __ jcc(Assembler::above, slow_case);
344 
345   // update the tlab top pointer
346   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
347 
348   // recover var_size_in_bytes if necessary
349   if (var_size_in_bytes == end) {
350     __ subptr(var_size_in_bytes, obj);
351   }
352   __ verify_tlab();
353 }
354 
355 #ifdef _LP64
356 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
357   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
358   if (bs_nm == nullptr) {
359     return;
360   }
361   Register thread = r15_thread;
362   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
363   // The immediate is the last 4 bytes, so if we align the start of the cmp
364   // instruction to 4 bytes, we know that the second half of it is also 4
365   // byte aligned, which means that the immediate will not cross a cache line
366   __ align(4);
367   uintptr_t before_cmp = (uintptr_t)__ pc();
368   __ cmpl_imm32(disarmed_addr, 0);
369   uintptr_t after_cmp = (uintptr_t)__ pc();
370   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
371 
372   if (slow_path != nullptr) {
373     __ jcc(Assembler::notEqual, *slow_path);
374     __ bind(*continuation);
375   } else {
376     Label done;
377     __ jccb(Assembler::equal, done);
378     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
379     __ bind(done);
380   }
381 }
382 #else
383 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
384   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
385   if (bs_nm == nullptr) {
386     return;
387   }
388 
389   Label continuation;
390 
391   Register tmp = rdi;
392   __ push(tmp);
393   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
394   Address disarmed_addr(tmp, 0);
395   __ align(4);
396   __ cmpl_imm32(disarmed_addr, 0);
397   __ pop(tmp);
398   __ jcc(Assembler::equal, continuation);
399   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
400   __ bind(continuation);
401 }
402 #endif
403 
404 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
405   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
406   if (bs == nullptr) {
407     return;
408   }
409 
410   Label bad_call;
411   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
412   __ jcc(Assembler::equal, bad_call);
413 
414   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
415   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
416 #ifndef _LP64
417   __ push(tmp1);
418   __ push(tmp2);
419 #endif // !_LP64
420 
421   // Pointer chase to the method holder to find out if the method is concurrently unloading.
422   Label method_live;
423   __ load_method_holder_cld(tmp1, rbx);
424 
425    // Is it a strong CLD?
426   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0);
427   __ jcc(Assembler::greater, method_live);
428 
429    // Is it a weak but alive CLD?
430   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
431   __ resolve_weak_handle(tmp1, tmp2);
432   __ cmpptr(tmp1, 0);
433   __ jcc(Assembler::notEqual, method_live);
434 
435 #ifndef _LP64
436   __ pop(tmp2);
437   __ pop(tmp1);
438 #endif
439 
440   __ bind(bad_call);
441   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
442   __ bind(method_live);
443 
444 #ifndef _LP64
445   __ pop(tmp2);
446   __ pop(tmp1);
447 #endif
448 }
449 
450 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
451   // Check if the oop is in the right area of memory
452   __ movptr(tmp1, obj);
453   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
454   __ andptr(tmp1, tmp2);
455   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
456   __ cmpptr(tmp1, tmp2);
457   __ jcc(Assembler::notZero, error);
458 
459   // make sure klass is 'reasonable', which is not zero.
460   __ load_klass(obj, obj, tmp1);  // get klass
461   __ testptr(obj, obj);
462   __ jcc(Assembler::zero, error); // if klass is null it is broken
463 }
464 
465 #ifdef COMPILER2
466 
467 #ifdef _LP64
468 
469 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
470   if (!OptoReg::is_reg(opto_reg)) {
471     return OptoReg::Bad;
472   }
473 
474   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
475   if (vm_reg->is_XMMRegister()) {
476     opto_reg &= ~15;
477     switch (node->ideal_reg()) {
478     case Op_VecX:
479       opto_reg |= 2;
480       break;
481     case Op_VecY:
482       opto_reg |= 4;
483       break;
484     case Op_VecZ:
485       opto_reg |= 8;
486       break;
487     default:
488       opto_reg |= 1;
489       break;
490     }
491   }
492 
493   return opto_reg;
494 }
495 
496 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
497 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
498                             int stack_offset, int reg, uint ireg, outputStream* st);
499 
500 #undef __
501 #define __ _masm->
502 
503 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
504   if (left->_size == right->_size) {
505     return 0;
506   }
507 
508   return (left->_size < right->_size) ? -1 : 1;
509 }
510 
511 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) {
512   // The low order 4 bytes denote what size of the XMM register is live
513   return (opto_reg & 15) << 3;
514 }
515 
516 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) {
517   switch (reg_size) {
518   case 8:
519     return Op_VecD;
520   case 16:
521     return Op_VecX;
522   case 32:
523     return Op_VecY;
524   case 64:
525     return Op_VecZ;
526   default:
527     fatal("Invalid register size %d", reg_size);
528     return 0;
529   }
530 }
531 
532 bool SaveLiveRegisters::xmm_needs_vzeroupper() const {
533   return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
534 }
535 
536 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) {
537   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
538   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
539   _spill_offset -= reg_data._size;
540   C2_MacroAssembler c2_masm(__ code());
541   vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
542 }
543 
544 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) {
545   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
546   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
547   C2_MacroAssembler c2_masm(__ code());
548   vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
549   _spill_offset += reg_data._size;
550 }
551 
552 void SaveLiveRegisters::gp_register_save(Register reg) {
553   _spill_offset -= 8;
554   __ movq(Address(rsp, _spill_offset), reg);
555 }
556 
557 void SaveLiveRegisters::opmask_register_save(KRegister reg) {
558   _spill_offset -= 8;
559   __ kmov(Address(rsp, _spill_offset), reg);
560 }
561 
562 void SaveLiveRegisters::gp_register_restore(Register reg) {
563   __ movq(reg, Address(rsp, _spill_offset));
564   _spill_offset += 8;
565 }
566 
567 void SaveLiveRegisters::opmask_register_restore(KRegister reg) {
568   __ kmov(reg, Address(rsp, _spill_offset));
569   _spill_offset += 8;
570 }
571 
572 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
573   // Create mask of caller saved registers that need to
574   // be saved/restored if live
575   RegMask caller_saved;
576   caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
577   caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
578   caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
579   caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
580   caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
581   caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
582   caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
583   caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
584   caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
585 
586   if (UseAPX) {
587     caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
588     caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
589     caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
590     caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
591     caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
592     caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
593     caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
594     caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
595     caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
596     caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
597     caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
598     caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
599     caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
600     caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
601     caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
602     caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
603   }
604 
605   int gp_spill_size = 0;
606   int opmask_spill_size = 0;
607   int xmm_spill_size = 0;
608 
609   // Record registers that needs to be saved/restored
610   RegMaskIterator rmi(stub->preserve_set());
611   while (rmi.has_next()) {
612     const OptoReg::Name opto_reg = rmi.next();
613     const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
614 
615     if (vm_reg->is_Register()) {
616       if (caller_saved.Member(opto_reg)) {
617         _gp_registers.append(vm_reg->as_Register());
618         gp_spill_size += 8;
619       }
620     } else if (vm_reg->is_KRegister()) {
621       // All opmask registers are caller saved, thus spill the ones
622       // which are live.
623       if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
624         _opmask_registers.append(vm_reg->as_KRegister());
625         opmask_spill_size += 8;
626       }
627     } else if (vm_reg->is_XMMRegister()) {
628       // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
629       const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
630       const int reg_size = xmm_slot_size(opto_reg);
631       const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
632       const int reg_index = _xmm_registers.find(reg_data);
633       if (reg_index == -1) {
634         // Not previously appended
635         _xmm_registers.append(reg_data);
636         xmm_spill_size += reg_size;
637       } else {
638         // Previously appended, update size
639         const int reg_size_prev = _xmm_registers.at(reg_index)._size;
640         if (reg_size > reg_size_prev) {
641           _xmm_registers.at_put(reg_index, reg_data);
642           xmm_spill_size += reg_size - reg_size_prev;
643         }
644       }
645     } else {
646       fatal("Unexpected register type");
647     }
648   }
649 
650   // Sort by size, largest first
651   _xmm_registers.sort(xmm_compare_register_size);
652 
653   // On Windows, the caller reserves stack space for spilling register arguments
654   const int arg_spill_size = frame::arg_reg_save_area_bytes;
655 
656   // Stack pointer must be 16 bytes aligned for the call
657   _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
658 }
659 
660 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
661   : _masm(masm),
662     _gp_registers(),
663     _opmask_registers(),
664     _xmm_registers(),
665     _spill_size(0),
666     _spill_offset(0) {
667 
668   //
669   // Stack layout after registers have been spilled:
670   //
671   // | ...            | original rsp, 16 bytes aligned
672   // ------------------
673   // | zmm0 high      |
674   // | ...            |
675   // | zmm0 low       | 16 bytes aligned
676   // | ...            |
677   // | ymm1 high      |
678   // | ...            |
679   // | ymm1 low       | 16 bytes aligned
680   // | ...            |
681   // | xmmN high      |
682   // | ...            |
683   // | xmmN low       | 8 bytes aligned
684   // | reg0           | 8 bytes aligned
685   // | reg1           |
686   // | ...            |
687   // | regN           | new rsp, if 16 bytes aligned
688   // | <padding>      | else new rsp, 16 bytes aligned
689   // ------------------
690   //
691 
692   // Figure out what registers to save/restore
693   initialize(stub);
694 
695   // Allocate stack space
696   if (_spill_size > 0) {
697     __ subptr(rsp, _spill_size);
698   }
699 
700   // Save XMM/YMM/ZMM registers
701   for (int i = 0; i < _xmm_registers.length(); i++) {
702     xmm_register_save(_xmm_registers.at(i));
703   }
704 
705   if (xmm_needs_vzeroupper()) {
706     __ vzeroupper();
707   }
708 
709   // Save general purpose registers
710   for (int i = 0; i < _gp_registers.length(); i++) {
711     gp_register_save(_gp_registers.at(i));
712   }
713 
714   // Save opmask registers
715   for (int i = 0; i < _opmask_registers.length(); i++) {
716     opmask_register_save(_opmask_registers.at(i));
717   }
718 }
719 
720 SaveLiveRegisters::~SaveLiveRegisters() {
721   // Restore opmask registers
722   for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
723     opmask_register_restore(_opmask_registers.at(i));
724   }
725 
726   // Restore general purpose registers
727   for (int i = _gp_registers.length() - 1; i >= 0; i--) {
728     gp_register_restore(_gp_registers.at(i));
729   }
730 
731   __ vzeroupper();
732 
733   // Restore XMM/YMM/ZMM registers
734   for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
735     xmm_register_restore(_xmm_registers.at(i));
736   }
737 
738   // Free stack space
739   if (_spill_size > 0) {
740     __ addptr(rsp, _spill_size);
741   }
742 }
743 
744 #else // !_LP64
745 
746 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
747   Unimplemented(); // This must be implemented to support late barrier expansion.
748 }
749 
750 #endif // _LP64
751 
752 #endif // COMPILER2