1 /* 2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/classLoaderData.hpp" 28 #include "gc/shared/barrierSet.hpp" 29 #include "gc/shared/barrierSetAssembler.hpp" 30 #include "gc/shared/barrierSetNMethod.hpp" 31 #include "gc/shared/barrierSetRuntime.hpp" 32 #include "gc/shared/collectedHeap.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/javaThread.hpp" 36 #include "runtime/jniHandles.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubRoutines.hpp" 39 #ifdef COMPILER2 40 #include "gc/shared/c2/barrierSetC2.hpp" 41 #endif // COMPILER2 42 43 #define __ masm-> 44 45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 46 Register dst, Address src, Register tmp1, Register tmp_thread) { 47 bool in_heap = (decorators & IN_HEAP) != 0; 48 bool in_native = (decorators & IN_NATIVE) != 0; 49 bool is_not_null = (decorators & IS_NOT_NULL) != 0; 50 bool atomic = (decorators & MO_RELAXED) != 0; 51 52 switch (type) { 53 case T_OBJECT: 54 case T_ARRAY: { 55 if (in_heap) { 56 #ifdef _LP64 57 if (UseCompressedOops) { 58 __ movl(dst, src); 59 if (is_not_null) { 60 __ decode_heap_oop_not_null(dst); 61 } else { 62 __ decode_heap_oop(dst); 63 } 64 } else 65 #endif 66 { 67 __ movptr(dst, src); 68 } 69 } else { 70 assert(in_native, "why else?"); 71 __ movptr(dst, src); 72 } 73 break; 74 } 75 case T_BOOLEAN: __ load_unsigned_byte(dst, src); break; 76 case T_BYTE: __ load_signed_byte(dst, src); break; 77 case T_CHAR: __ load_unsigned_short(dst, src); break; 78 case T_SHORT: __ load_signed_short(dst, src); break; 79 case T_INT: __ movl (dst, src); break; 80 case T_ADDRESS: __ movptr(dst, src); break; 81 case T_FLOAT: 82 assert(dst == noreg, "only to ftos"); 83 __ load_float(src); 84 break; 85 case T_DOUBLE: 86 assert(dst == noreg, "only to dtos"); 87 __ load_double(src); 88 break; 89 case T_LONG: 90 assert(dst == noreg, "only to ltos"); 91 #ifdef _LP64 92 __ movq(rax, src); 93 #else 94 if (atomic) { 95 __ fild_d(src); // Must load atomically 96 __ subptr(rsp,2*wordSize); // Make space for store 97 __ fistp_d(Address(rsp,0)); 98 __ pop(rax); 99 __ pop(rdx); 100 } else { 101 __ movl(rax, src); 102 __ movl(rdx, src.plus_disp(wordSize)); 103 } 104 #endif 105 break; 106 default: Unimplemented(); 107 } 108 } 109 110 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 111 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 112 bool in_heap = (decorators & IN_HEAP) != 0; 113 bool in_native = (decorators & IN_NATIVE) != 0; 114 bool is_not_null = (decorators & IS_NOT_NULL) != 0; 115 bool atomic = (decorators & MO_RELAXED) != 0; 116 117 switch (type) { 118 case T_OBJECT: 119 case T_ARRAY: { 120 if (in_heap) { 121 if (val == noreg) { 122 assert(!is_not_null, "inconsistent access"); 123 #ifdef _LP64 124 if (UseCompressedOops) { 125 __ movl(dst, NULL_WORD); 126 } else { 127 __ movslq(dst, NULL_WORD); 128 } 129 #else 130 __ movl(dst, NULL_WORD); 131 #endif 132 } else { 133 #ifdef _LP64 134 if (UseCompressedOops) { 135 assert(!dst.uses(val), "not enough registers"); 136 if (is_not_null) { 137 __ encode_heap_oop_not_null(val); 138 } else { 139 __ encode_heap_oop(val); 140 } 141 __ movl(dst, val); 142 } else 143 #endif 144 { 145 __ movptr(dst, val); 146 } 147 } 148 } else { 149 assert(in_native, "why else?"); 150 assert(val != noreg, "not supported"); 151 __ movptr(dst, val); 152 } 153 break; 154 } 155 case T_BOOLEAN: 156 __ andl(val, 0x1); // boolean is true if LSB is 1 157 __ movb(dst, val); 158 break; 159 case T_BYTE: 160 __ movb(dst, val); 161 break; 162 case T_SHORT: 163 __ movw(dst, val); 164 break; 165 case T_CHAR: 166 __ movw(dst, val); 167 break; 168 case T_INT: 169 __ movl(dst, val); 170 break; 171 case T_LONG: 172 assert(val == noreg, "only tos"); 173 #ifdef _LP64 174 __ movq(dst, rax); 175 #else 176 if (atomic) { 177 __ push(rdx); 178 __ push(rax); // Must update atomically with FIST 179 __ fild_d(Address(rsp,0)); // So load into FPU register 180 __ fistp_d(dst); // and put into memory atomically 181 __ addptr(rsp, 2*wordSize); 182 } else { 183 __ movptr(dst, rax); 184 __ movptr(dst.plus_disp(wordSize), rdx); 185 } 186 #endif 187 break; 188 case T_FLOAT: 189 assert(val == noreg, "only tos"); 190 __ store_float(dst); 191 break; 192 case T_DOUBLE: 193 assert(val == noreg, "only tos"); 194 __ store_double(dst); 195 break; 196 case T_ADDRESS: 197 __ movptr(dst, val); 198 break; 199 default: Unimplemented(); 200 } 201 } 202 203 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, 204 Register src, Register dst, Register inline_layout_info) { 205 // flat_field_copy implementation is fairly complex, and there are not any 206 // "short-cuts" to be made from asm. What there is, appears to have the same 207 // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds 208 // of hand-rolled instructions... 209 if (decorators & IS_DEST_UNINITIALIZED) { 210 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info); 211 } else { 212 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info); 213 } 214 } 215 216 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, 217 DecoratorSet decorators, 218 BasicType type, 219 size_t bytes, 220 Register dst, 221 Address src, 222 Register tmp) { 223 assert(bytes <= 8, "can only deal with non-vector registers"); 224 switch (bytes) { 225 case 1: 226 __ movb(dst, src); 227 break; 228 case 2: 229 __ movw(dst, src); 230 break; 231 case 4: 232 __ movl(dst, src); 233 break; 234 case 8: 235 #ifdef _LP64 236 __ movq(dst, src); 237 #else 238 fatal("No support for 8 bytes copy"); 239 #endif 240 break; 241 default: 242 fatal("Unexpected size"); 243 } 244 #ifdef _LP64 245 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { 246 __ decode_heap_oop(dst); 247 } 248 #endif 249 } 250 251 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, 252 DecoratorSet decorators, 253 BasicType type, 254 size_t bytes, 255 Address dst, 256 Register src, 257 Register tmp) { 258 #ifdef _LP64 259 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { 260 __ encode_heap_oop(src); 261 } 262 #endif 263 assert(bytes <= 8, "can only deal with non-vector registers"); 264 switch (bytes) { 265 case 1: 266 __ movb(dst, src); 267 break; 268 case 2: 269 __ movw(dst, src); 270 break; 271 case 4: 272 __ movl(dst, src); 273 break; 274 case 8: 275 #ifdef _LP64 276 __ movq(dst, src); 277 #else 278 fatal("No support for 8 bytes copy"); 279 #endif 280 break; 281 default: 282 fatal("Unexpected size"); 283 } 284 } 285 286 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, 287 DecoratorSet decorators, 288 BasicType type, 289 size_t bytes, 290 XMMRegister dst, 291 Address src, 292 Register tmp, 293 XMMRegister xmm_tmp) { 294 assert(bytes > 8, "can only deal with vector registers"); 295 if (bytes == 16) { 296 __ movdqu(dst, src); 297 } else if (bytes == 32) { 298 __ vmovdqu(dst, src); 299 } else { 300 fatal("No support for >32 bytes copy"); 301 } 302 } 303 304 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, 305 DecoratorSet decorators, 306 BasicType type, 307 size_t bytes, 308 Address dst, 309 XMMRegister src, 310 Register tmp1, 311 Register tmp2, 312 XMMRegister xmm_tmp) { 313 assert(bytes > 8, "can only deal with vector registers"); 314 if (bytes == 16) { 315 __ movdqu(dst, src); 316 } else if (bytes == 32) { 317 __ vmovdqu(dst, src); 318 } else { 319 fatal("No support for >32 bytes copy"); 320 } 321 } 322 323 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 324 Register obj, Register tmp, Label& slowpath) { 325 __ clear_jobject_tag(obj); 326 __ movptr(obj, Address(obj, 0)); 327 } 328 329 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, 330 Register thread, Register obj, 331 Register var_size_in_bytes, 332 int con_size_in_bytes, 333 Register t1, 334 Register t2, 335 Label& slow_case) { 336 assert_different_registers(obj, t1, t2); 337 assert_different_registers(obj, var_size_in_bytes, t1); 338 Register end = t2; 339 if (!thread->is_valid()) { 340 #ifdef _LP64 341 thread = r15_thread; 342 #else 343 assert(t1->is_valid(), "need temp reg"); 344 thread = t1; 345 __ get_thread(thread); 346 #endif 347 } 348 349 __ verify_tlab(); 350 351 __ movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 352 if (var_size_in_bytes == noreg) { 353 __ lea(end, Address(obj, con_size_in_bytes)); 354 } else { 355 __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 356 } 357 __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 358 __ jcc(Assembler::above, slow_case); 359 360 // update the tlab top pointer 361 __ movptr(Address(thread, JavaThread::tlab_top_offset()), end); 362 363 // recover var_size_in_bytes if necessary 364 if (var_size_in_bytes == end) { 365 __ subptr(var_size_in_bytes, obj); 366 } 367 __ verify_tlab(); 368 } 369 370 #ifdef _LP64 371 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) { 372 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 373 if (bs_nm == nullptr) { 374 return; 375 } 376 Register thread = r15_thread; 377 Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset())); 378 // The immediate is the last 4 bytes, so if we align the start of the cmp 379 // instruction to 4 bytes, we know that the second half of it is also 4 380 // byte aligned, which means that the immediate will not cross a cache line 381 __ align(4); 382 uintptr_t before_cmp = (uintptr_t)__ pc(); 383 __ cmpl_imm32(disarmed_addr, 0); 384 uintptr_t after_cmp = (uintptr_t)__ pc(); 385 guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length"); 386 387 if (slow_path != nullptr) { 388 __ jcc(Assembler::notEqual, *slow_path); 389 __ bind(*continuation); 390 } else { 391 Label done; 392 __ jccb(Assembler::equal, done); 393 __ call(RuntimeAddress(StubRoutines::method_entry_barrier())); 394 __ bind(done); 395 } 396 } 397 #else 398 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) { 399 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 400 if (bs_nm == nullptr) { 401 return; 402 } 403 404 Label continuation; 405 406 Register tmp = rdi; 407 __ push(tmp); 408 __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address()); 409 Address disarmed_addr(tmp, 0); 410 __ align(4); 411 __ cmpl_imm32(disarmed_addr, 0); 412 __ pop(tmp); 413 __ jcc(Assembler::equal, continuation); 414 __ call(RuntimeAddress(StubRoutines::method_entry_barrier())); 415 __ bind(continuation); 416 } 417 #endif 418 419 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { 420 BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); 421 if (bs == nullptr) { 422 return; 423 } 424 425 Label bad_call; 426 __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. 427 __ jcc(Assembler::equal, bad_call); 428 429 Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax ); 430 Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx ); 431 #ifndef _LP64 432 __ push(tmp1); 433 __ push(tmp2); 434 #endif // !_LP64 435 436 // Pointer chase to the method holder to find out if the method is concurrently unloading. 437 Label method_live; 438 __ load_method_holder_cld(tmp1, rbx); 439 440 // Is it a strong CLD? 441 __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0); 442 __ jcc(Assembler::greater, method_live); 443 444 // Is it a weak but alive CLD? 445 __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset())); 446 __ resolve_weak_handle(tmp1, tmp2); 447 __ cmpptr(tmp1, 0); 448 __ jcc(Assembler::notEqual, method_live); 449 450 #ifndef _LP64 451 __ pop(tmp2); 452 __ pop(tmp1); 453 #endif 454 455 __ bind(bad_call); 456 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); 457 __ bind(method_live); 458 459 #ifndef _LP64 460 __ pop(tmp2); 461 __ pop(tmp1); 462 #endif 463 } 464 465 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) { 466 // Check if the oop is in the right area of memory 467 __ movptr(tmp1, obj); 468 __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask()); 469 __ andptr(tmp1, tmp2); 470 __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits()); 471 __ cmpptr(tmp1, tmp2); 472 __ jcc(Assembler::notZero, error); 473 474 // make sure klass is 'reasonable', which is not zero. 475 __ load_klass(obj, obj, tmp1); // get klass 476 __ testptr(obj, obj); 477 __ jcc(Assembler::zero, error); // if klass is null it is broken 478 } 479 480 #ifdef COMPILER2 481 482 #ifdef _LP64 483 484 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { 485 if (!OptoReg::is_reg(opto_reg)) { 486 return OptoReg::Bad; 487 } 488 489 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); 490 if (vm_reg->is_XMMRegister()) { 491 opto_reg &= ~15; 492 switch (node->ideal_reg()) { 493 case Op_VecX: 494 opto_reg |= 2; 495 break; 496 case Op_VecY: 497 opto_reg |= 4; 498 break; 499 case Op_VecZ: 500 opto_reg |= 8; 501 break; 502 default: 503 opto_reg |= 1; 504 break; 505 } 506 } 507 508 return opto_reg; 509 } 510 511 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel 512 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 513 int stack_offset, int reg, uint ireg, outputStream* st); 514 515 #undef __ 516 #define __ _masm-> 517 518 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) { 519 if (left->_size == right->_size) { 520 return 0; 521 } 522 523 return (left->_size < right->_size) ? -1 : 1; 524 } 525 526 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) { 527 // The low order 4 bytes denote what size of the XMM register is live 528 return (opto_reg & 15) << 3; 529 } 530 531 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) { 532 switch (reg_size) { 533 case 8: 534 return Op_VecD; 535 case 16: 536 return Op_VecX; 537 case 32: 538 return Op_VecY; 539 case 64: 540 return Op_VecZ; 541 default: 542 fatal("Invalid register size %d", reg_size); 543 return 0; 544 } 545 } 546 547 bool SaveLiveRegisters::xmm_needs_vzeroupper() const { 548 return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16; 549 } 550 551 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) { 552 const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); 553 const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); 554 _spill_offset -= reg_data._size; 555 C2_MacroAssembler c2_masm(__ code()); 556 vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); 557 } 558 559 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) { 560 const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); 561 const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); 562 C2_MacroAssembler c2_masm(__ code()); 563 vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); 564 _spill_offset += reg_data._size; 565 } 566 567 void SaveLiveRegisters::gp_register_save(Register reg) { 568 _spill_offset -= 8; 569 __ movq(Address(rsp, _spill_offset), reg); 570 } 571 572 void SaveLiveRegisters::opmask_register_save(KRegister reg) { 573 _spill_offset -= 8; 574 __ kmov(Address(rsp, _spill_offset), reg); 575 } 576 577 void SaveLiveRegisters::gp_register_restore(Register reg) { 578 __ movq(reg, Address(rsp, _spill_offset)); 579 _spill_offset += 8; 580 } 581 582 void SaveLiveRegisters::opmask_register_restore(KRegister reg) { 583 __ kmov(reg, Address(rsp, _spill_offset)); 584 _spill_offset += 8; 585 } 586 587 void SaveLiveRegisters::initialize(BarrierStubC2* stub) { 588 // Create mask of caller saved registers that need to 589 // be saved/restored if live 590 RegMask caller_saved; 591 caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg())); 592 caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg())); 593 caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg())); 594 caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg())); 595 caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg())); 596 caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg())); 597 caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg())); 598 caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg())); 599 caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg())); 600 601 if (UseAPX) { 602 caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg())); 603 caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg())); 604 caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg())); 605 caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg())); 606 caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg())); 607 caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg())); 608 caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg())); 609 caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg())); 610 caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg())); 611 caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg())); 612 caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg())); 613 caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg())); 614 caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg())); 615 caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg())); 616 caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg())); 617 caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg())); 618 } 619 620 int gp_spill_size = 0; 621 int opmask_spill_size = 0; 622 int xmm_spill_size = 0; 623 624 // Record registers that needs to be saved/restored 625 RegMaskIterator rmi(stub->preserve_set()); 626 while (rmi.has_next()) { 627 const OptoReg::Name opto_reg = rmi.next(); 628 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); 629 630 if (vm_reg->is_Register()) { 631 if (caller_saved.Member(opto_reg)) { 632 _gp_registers.append(vm_reg->as_Register()); 633 gp_spill_size += 8; 634 } 635 } else if (vm_reg->is_KRegister()) { 636 // All opmask registers are caller saved, thus spill the ones 637 // which are live. 638 if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) { 639 _opmask_registers.append(vm_reg->as_KRegister()); 640 opmask_spill_size += 8; 641 } 642 } else if (vm_reg->is_XMMRegister()) { 643 // We encode in the low order 4 bits of the opto_reg, how large part of the register is live 644 const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15); 645 const int reg_size = xmm_slot_size(opto_reg); 646 const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size }; 647 const int reg_index = _xmm_registers.find(reg_data); 648 if (reg_index == -1) { 649 // Not previously appended 650 _xmm_registers.append(reg_data); 651 xmm_spill_size += reg_size; 652 } else { 653 // Previously appended, update size 654 const int reg_size_prev = _xmm_registers.at(reg_index)._size; 655 if (reg_size > reg_size_prev) { 656 _xmm_registers.at_put(reg_index, reg_data); 657 xmm_spill_size += reg_size - reg_size_prev; 658 } 659 } 660 } else { 661 fatal("Unexpected register type"); 662 } 663 } 664 665 // Sort by size, largest first 666 _xmm_registers.sort(xmm_compare_register_size); 667 668 // On Windows, the caller reserves stack space for spilling register arguments 669 const int arg_spill_size = frame::arg_reg_save_area_bytes; 670 671 // Stack pointer must be 16 bytes aligned for the call 672 _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16); 673 } 674 675 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub) 676 : _masm(masm), 677 _gp_registers(), 678 _opmask_registers(), 679 _xmm_registers(), 680 _spill_size(0), 681 _spill_offset(0) { 682 683 // 684 // Stack layout after registers have been spilled: 685 // 686 // | ... | original rsp, 16 bytes aligned 687 // ------------------ 688 // | zmm0 high | 689 // | ... | 690 // | zmm0 low | 16 bytes aligned 691 // | ... | 692 // | ymm1 high | 693 // | ... | 694 // | ymm1 low | 16 bytes aligned 695 // | ... | 696 // | xmmN high | 697 // | ... | 698 // | xmmN low | 8 bytes aligned 699 // | reg0 | 8 bytes aligned 700 // | reg1 | 701 // | ... | 702 // | regN | new rsp, if 16 bytes aligned 703 // | <padding> | else new rsp, 16 bytes aligned 704 // ------------------ 705 // 706 707 // Figure out what registers to save/restore 708 initialize(stub); 709 710 // Allocate stack space 711 if (_spill_size > 0) { 712 __ subptr(rsp, _spill_size); 713 } 714 715 // Save XMM/YMM/ZMM registers 716 for (int i = 0; i < _xmm_registers.length(); i++) { 717 xmm_register_save(_xmm_registers.at(i)); 718 } 719 720 if (xmm_needs_vzeroupper()) { 721 __ vzeroupper(); 722 } 723 724 // Save general purpose registers 725 for (int i = 0; i < _gp_registers.length(); i++) { 726 gp_register_save(_gp_registers.at(i)); 727 } 728 729 // Save opmask registers 730 for (int i = 0; i < _opmask_registers.length(); i++) { 731 opmask_register_save(_opmask_registers.at(i)); 732 } 733 } 734 735 SaveLiveRegisters::~SaveLiveRegisters() { 736 // Restore opmask registers 737 for (int i = _opmask_registers.length() - 1; i >= 0; i--) { 738 opmask_register_restore(_opmask_registers.at(i)); 739 } 740 741 // Restore general purpose registers 742 for (int i = _gp_registers.length() - 1; i >= 0; i--) { 743 gp_register_restore(_gp_registers.at(i)); 744 } 745 746 __ vzeroupper(); 747 748 // Restore XMM/YMM/ZMM registers 749 for (int i = _xmm_registers.length() - 1; i >= 0; i--) { 750 xmm_register_restore(_xmm_registers.at(i)); 751 } 752 753 // Free stack space 754 if (_spill_size > 0) { 755 __ addptr(rsp, _spill_size); 756 } 757 } 758 759 #else // !_LP64 760 761 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { 762 Unimplemented(); // This must be implemented to support late barrier expansion. 763 } 764 765 #endif // _LP64 766 767 #endif // COMPILER2