1 /* 2 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/classLoaderData.hpp" 28 #include "gc/shared/barrierSet.hpp" 29 #include "gc/shared/barrierSetAssembler.hpp" 30 #include "gc/shared/barrierSetNMethod.hpp" 31 #include "gc/shared/barrierSetRuntime.hpp" 32 #include "gc/shared/collectedHeap.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/javaThread.hpp" 36 #include "runtime/jniHandles.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubRoutines.hpp" 39 #ifdef COMPILER2 40 #include "gc/shared/c2/barrierSetC2.hpp" 41 #endif // COMPILER2 42 43 #define __ masm-> 44 45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 46 Register dst, Address src, Register tmp1, Register tmp_thread) { 47 bool in_heap = (decorators & IN_HEAP) != 0; 48 bool in_native = (decorators & IN_NATIVE) != 0; 49 bool is_not_null = (decorators & IS_NOT_NULL) != 0; 50 bool atomic = (decorators & MO_RELAXED) != 0; 51 52 switch (type) { 53 case T_OBJECT: 54 case T_ARRAY: { 55 if (in_heap) { 56 #ifdef _LP64 57 if (UseCompressedOops) { 58 __ movl(dst, src); 59 if (is_not_null) { 60 __ decode_heap_oop_not_null(dst); 61 } else { 62 __ decode_heap_oop(dst); 63 } 64 } else 65 #endif 66 { 67 __ movptr(dst, src); 68 } 69 } else { 70 assert(in_native, "why else?"); 71 __ movptr(dst, src); 72 } 73 break; 74 } 75 case T_BOOLEAN: __ load_unsigned_byte(dst, src); break; 76 case T_BYTE: __ load_signed_byte(dst, src); break; 77 case T_CHAR: __ load_unsigned_short(dst, src); break; 78 case T_SHORT: __ load_signed_short(dst, src); break; 79 case T_INT: __ movl (dst, src); break; 80 case T_ADDRESS: __ movptr(dst, src); break; 81 case T_FLOAT: 82 assert(dst == noreg, "only to ftos"); 83 __ load_float(src); 84 break; 85 case T_DOUBLE: 86 assert(dst == noreg, "only to dtos"); 87 __ load_double(src); 88 break; 89 case T_LONG: 90 assert(dst == noreg, "only to ltos"); 91 #ifdef _LP64 92 __ movq(rax, src); 93 #else 94 if (atomic) { 95 __ fild_d(src); // Must load atomically 96 __ subptr(rsp,2*wordSize); // Make space for store 97 __ fistp_d(Address(rsp,0)); 98 __ pop(rax); 99 __ pop(rdx); 100 } else { 101 __ movl(rax, src); 102 __ movl(rdx, src.plus_disp(wordSize)); 103 } 104 #endif 105 break; 106 default: Unimplemented(); 107 } 108 } 109 110 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 111 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 112 bool in_heap = (decorators & IN_HEAP) != 0; 113 bool in_native = (decorators & IN_NATIVE) != 0; 114 bool is_not_null = (decorators & IS_NOT_NULL) != 0; 115 bool atomic = (decorators & MO_RELAXED) != 0; 116 117 switch (type) { 118 case T_OBJECT: 119 case T_ARRAY: { 120 if (in_heap) { 121 if (val == noreg) { 122 assert(!is_not_null, "inconsistent access"); 123 #ifdef _LP64 124 if (UseCompressedOops) { 125 __ movl(dst, NULL_WORD); 126 } else { 127 __ movslq(dst, NULL_WORD); 128 } 129 #else 130 __ movl(dst, NULL_WORD); 131 #endif 132 } else { 133 #ifdef _LP64 134 if (UseCompressedOops) { 135 assert(!dst.uses(val), "not enough registers"); 136 if (is_not_null) { 137 __ encode_heap_oop_not_null(val); 138 } else { 139 __ encode_heap_oop(val); 140 } 141 __ movl(dst, val); 142 } else 143 #endif 144 { 145 __ movptr(dst, val); 146 } 147 } 148 } else { 149 assert(in_native, "why else?"); 150 assert(val != noreg, "not supported"); 151 __ movptr(dst, val); 152 } 153 break; 154 } 155 case T_BOOLEAN: 156 __ andl(val, 0x1); // boolean is true if LSB is 1 157 __ movb(dst, val); 158 break; 159 case T_BYTE: 160 __ movb(dst, val); 161 break; 162 case T_SHORT: 163 __ movw(dst, val); 164 break; 165 case T_CHAR: 166 __ movw(dst, val); 167 break; 168 case T_INT: 169 __ movl(dst, val); 170 break; 171 case T_LONG: 172 assert(val == noreg, "only tos"); 173 #ifdef _LP64 174 __ movq(dst, rax); 175 #else 176 if (atomic) { 177 __ push(rdx); 178 __ push(rax); // Must update atomically with FIST 179 __ fild_d(Address(rsp,0)); // So load into FPU register 180 __ fistp_d(dst); // and put into memory atomically 181 __ addptr(rsp, 2*wordSize); 182 } else { 183 __ movptr(dst, rax); 184 __ movptr(dst.plus_disp(wordSize), rdx); 185 } 186 #endif 187 break; 188 case T_FLOAT: 189 assert(val == noreg, "only tos"); 190 __ store_float(dst); 191 break; 192 case T_DOUBLE: 193 assert(val == noreg, "only tos"); 194 __ store_double(dst); 195 break; 196 case T_ADDRESS: 197 __ movptr(dst, val); 198 break; 199 default: Unimplemented(); 200 } 201 } 202 203 void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorators, 204 Register src, Register dst, Register value_klass) { 205 // value_copy implementation is fairly complex, and there are not any 206 // "short-cuts" to be made from asm. What there is, appears to have the same 207 // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds 208 // of hand-rolled instructions... 209 if (decorators & IS_DEST_UNINITIALIZED) { 210 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, value_klass); 211 } else { 212 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, value_klass); 213 } 214 } 215 216 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, 217 Register src, Register dst, Register inline_layout_info) { 218 // flat_field_copy implementation is fairly complex, and there are not any 219 // "short-cuts" to be made from asm. What there is, appears to have the same 220 // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds 221 // of hand-rolled instructions... 222 if (decorators & IS_DEST_UNINITIALIZED) { 223 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized2), src, dst, inline_layout_info); 224 } else { 225 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy2), src, dst, inline_layout_info); 226 } 227 } 228 229 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, 230 DecoratorSet decorators, 231 BasicType type, 232 size_t bytes, 233 Register dst, 234 Address src, 235 Register tmp) { 236 assert(bytes <= 8, "can only deal with non-vector registers"); 237 switch (bytes) { 238 case 1: 239 __ movb(dst, src); 240 break; 241 case 2: 242 __ movw(dst, src); 243 break; 244 case 4: 245 __ movl(dst, src); 246 break; 247 case 8: 248 #ifdef _LP64 249 __ movq(dst, src); 250 #else 251 fatal("No support for 8 bytes copy"); 252 #endif 253 break; 254 default: 255 fatal("Unexpected size"); 256 } 257 #ifdef _LP64 258 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { 259 __ decode_heap_oop(dst); 260 } 261 #endif 262 } 263 264 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, 265 DecoratorSet decorators, 266 BasicType type, 267 size_t bytes, 268 Address dst, 269 Register src, 270 Register tmp) { 271 #ifdef _LP64 272 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { 273 __ encode_heap_oop(src); 274 } 275 #endif 276 assert(bytes <= 8, "can only deal with non-vector registers"); 277 switch (bytes) { 278 case 1: 279 __ movb(dst, src); 280 break; 281 case 2: 282 __ movw(dst, src); 283 break; 284 case 4: 285 __ movl(dst, src); 286 break; 287 case 8: 288 #ifdef _LP64 289 __ movq(dst, src); 290 #else 291 fatal("No support for 8 bytes copy"); 292 #endif 293 break; 294 default: 295 fatal("Unexpected size"); 296 } 297 } 298 299 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, 300 DecoratorSet decorators, 301 BasicType type, 302 size_t bytes, 303 XMMRegister dst, 304 Address src, 305 Register tmp, 306 XMMRegister xmm_tmp) { 307 assert(bytes > 8, "can only deal with vector registers"); 308 if (bytes == 16) { 309 __ movdqu(dst, src); 310 } else if (bytes == 32) { 311 __ vmovdqu(dst, src); 312 } else { 313 fatal("No support for >32 bytes copy"); 314 } 315 } 316 317 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, 318 DecoratorSet decorators, 319 BasicType type, 320 size_t bytes, 321 Address dst, 322 XMMRegister src, 323 Register tmp1, 324 Register tmp2, 325 XMMRegister xmm_tmp) { 326 assert(bytes > 8, "can only deal with vector registers"); 327 if (bytes == 16) { 328 __ movdqu(dst, src); 329 } else if (bytes == 32) { 330 __ vmovdqu(dst, src); 331 } else { 332 fatal("No support for >32 bytes copy"); 333 } 334 } 335 336 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 337 Register obj, Register tmp, Label& slowpath) { 338 __ clear_jobject_tag(obj); 339 __ movptr(obj, Address(obj, 0)); 340 } 341 342 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, 343 Register thread, Register obj, 344 Register var_size_in_bytes, 345 int con_size_in_bytes, 346 Register t1, 347 Register t2, 348 Label& slow_case) { 349 assert_different_registers(obj, t1, t2); 350 assert_different_registers(obj, var_size_in_bytes, t1); 351 Register end = t2; 352 if (!thread->is_valid()) { 353 #ifdef _LP64 354 thread = r15_thread; 355 #else 356 assert(t1->is_valid(), "need temp reg"); 357 thread = t1; 358 __ get_thread(thread); 359 #endif 360 } 361 362 __ verify_tlab(); 363 364 __ movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 365 if (var_size_in_bytes == noreg) { 366 __ lea(end, Address(obj, con_size_in_bytes)); 367 } else { 368 __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 369 } 370 __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 371 __ jcc(Assembler::above, slow_case); 372 373 // update the tlab top pointer 374 __ movptr(Address(thread, JavaThread::tlab_top_offset()), end); 375 376 // recover var_size_in_bytes if necessary 377 if (var_size_in_bytes == end) { 378 __ subptr(var_size_in_bytes, obj); 379 } 380 __ verify_tlab(); 381 } 382 383 #ifdef _LP64 384 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) { 385 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 386 if (bs_nm == nullptr) { 387 return; 388 } 389 Register thread = r15_thread; 390 Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset())); 391 // The immediate is the last 4 bytes, so if we align the start of the cmp 392 // instruction to 4 bytes, we know that the second half of it is also 4 393 // byte aligned, which means that the immediate will not cross a cache line 394 __ align(4); 395 uintptr_t before_cmp = (uintptr_t)__ pc(); 396 __ cmpl_imm32(disarmed_addr, 0); 397 uintptr_t after_cmp = (uintptr_t)__ pc(); 398 guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length"); 399 400 if (slow_path != nullptr) { 401 __ jcc(Assembler::notEqual, *slow_path); 402 __ bind(*continuation); 403 } else { 404 Label done; 405 __ jccb(Assembler::equal, done); 406 __ call(RuntimeAddress(StubRoutines::method_entry_barrier())); 407 __ bind(done); 408 } 409 } 410 #else 411 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) { 412 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 413 if (bs_nm == nullptr) { 414 return; 415 } 416 417 Label continuation; 418 419 Register tmp = rdi; 420 __ push(tmp); 421 __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address()); 422 Address disarmed_addr(tmp, 0); 423 __ align(4); 424 __ cmpl_imm32(disarmed_addr, 0); 425 __ pop(tmp); 426 __ jcc(Assembler::equal, continuation); 427 __ call(RuntimeAddress(StubRoutines::method_entry_barrier())); 428 __ bind(continuation); 429 } 430 #endif 431 432 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { 433 BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); 434 if (bs == nullptr) { 435 return; 436 } 437 438 Label bad_call; 439 __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. 440 __ jcc(Assembler::equal, bad_call); 441 442 Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax ); 443 Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx ); 444 #ifndef _LP64 445 __ push(tmp1); 446 __ push(tmp2); 447 #endif // !_LP64 448 449 // Pointer chase to the method holder to find out if the method is concurrently unloading. 450 Label method_live; 451 __ load_method_holder_cld(tmp1, rbx); 452 453 // Is it a strong CLD? 454 __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0); 455 __ jcc(Assembler::greater, method_live); 456 457 // Is it a weak but alive CLD? 458 __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset())); 459 __ resolve_weak_handle(tmp1, tmp2); 460 __ cmpptr(tmp1, 0); 461 __ jcc(Assembler::notEqual, method_live); 462 463 #ifndef _LP64 464 __ pop(tmp2); 465 __ pop(tmp1); 466 #endif 467 468 __ bind(bad_call); 469 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); 470 __ bind(method_live); 471 472 #ifndef _LP64 473 __ pop(tmp2); 474 __ pop(tmp1); 475 #endif 476 } 477 478 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) { 479 // Check if the oop is in the right area of memory 480 __ movptr(tmp1, obj); 481 __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask()); 482 __ andptr(tmp1, tmp2); 483 __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits()); 484 __ cmpptr(tmp1, tmp2); 485 __ jcc(Assembler::notZero, error); 486 487 // make sure klass is 'reasonable', which is not zero. 488 __ load_klass(obj, obj, tmp1); // get klass 489 __ testptr(obj, obj); 490 __ jcc(Assembler::zero, error); // if klass is null it is broken 491 } 492 493 #ifdef COMPILER2 494 495 #ifdef _LP64 496 497 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { 498 if (!OptoReg::is_reg(opto_reg)) { 499 return OptoReg::Bad; 500 } 501 502 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); 503 if (vm_reg->is_XMMRegister()) { 504 opto_reg &= ~15; 505 switch (node->ideal_reg()) { 506 case Op_VecX: 507 opto_reg |= 2; 508 break; 509 case Op_VecY: 510 opto_reg |= 4; 511 break; 512 case Op_VecZ: 513 opto_reg |= 8; 514 break; 515 default: 516 opto_reg |= 1; 517 break; 518 } 519 } 520 521 return opto_reg; 522 } 523 524 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel 525 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 526 int stack_offset, int reg, uint ireg, outputStream* st); 527 528 #undef __ 529 #define __ _masm-> 530 531 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) { 532 if (left->_size == right->_size) { 533 return 0; 534 } 535 536 return (left->_size < right->_size) ? -1 : 1; 537 } 538 539 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) { 540 // The low order 4 bytes denote what size of the XMM register is live 541 return (opto_reg & 15) << 3; 542 } 543 544 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) { 545 switch (reg_size) { 546 case 8: 547 return Op_VecD; 548 case 16: 549 return Op_VecX; 550 case 32: 551 return Op_VecY; 552 case 64: 553 return Op_VecZ; 554 default: 555 fatal("Invalid register size %d", reg_size); 556 return 0; 557 } 558 } 559 560 bool SaveLiveRegisters::xmm_needs_vzeroupper() const { 561 return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16; 562 } 563 564 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) { 565 const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); 566 const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); 567 _spill_offset -= reg_data._size; 568 C2_MacroAssembler c2_masm(__ code()); 569 vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); 570 } 571 572 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) { 573 const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); 574 const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); 575 C2_MacroAssembler c2_masm(__ code()); 576 vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); 577 _spill_offset += reg_data._size; 578 } 579 580 void SaveLiveRegisters::gp_register_save(Register reg) { 581 _spill_offset -= 8; 582 __ movq(Address(rsp, _spill_offset), reg); 583 } 584 585 void SaveLiveRegisters::opmask_register_save(KRegister reg) { 586 _spill_offset -= 8; 587 __ kmov(Address(rsp, _spill_offset), reg); 588 } 589 590 void SaveLiveRegisters::gp_register_restore(Register reg) { 591 __ movq(reg, Address(rsp, _spill_offset)); 592 _spill_offset += 8; 593 } 594 595 void SaveLiveRegisters::opmask_register_restore(KRegister reg) { 596 __ kmov(reg, Address(rsp, _spill_offset)); 597 _spill_offset += 8; 598 } 599 600 void SaveLiveRegisters::initialize(BarrierStubC2* stub) { 601 // Create mask of caller saved registers that need to 602 // be saved/restored if live 603 RegMask caller_saved; 604 caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg())); 605 caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg())); 606 caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg())); 607 caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg())); 608 caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg())); 609 caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg())); 610 caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg())); 611 caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg())); 612 caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg())); 613 614 if (UseAPX) { 615 caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg())); 616 caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg())); 617 caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg())); 618 caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg())); 619 caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg())); 620 caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg())); 621 caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg())); 622 caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg())); 623 caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg())); 624 caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg())); 625 caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg())); 626 caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg())); 627 caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg())); 628 caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg())); 629 caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg())); 630 caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg())); 631 } 632 633 int gp_spill_size = 0; 634 int opmask_spill_size = 0; 635 int xmm_spill_size = 0; 636 637 // Record registers that needs to be saved/restored 638 RegMaskIterator rmi(stub->preserve_set()); 639 while (rmi.has_next()) { 640 const OptoReg::Name opto_reg = rmi.next(); 641 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); 642 643 if (vm_reg->is_Register()) { 644 if (caller_saved.Member(opto_reg)) { 645 _gp_registers.append(vm_reg->as_Register()); 646 gp_spill_size += 8; 647 } 648 } else if (vm_reg->is_KRegister()) { 649 // All opmask registers are caller saved, thus spill the ones 650 // which are live. 651 if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) { 652 _opmask_registers.append(vm_reg->as_KRegister()); 653 opmask_spill_size += 8; 654 } 655 } else if (vm_reg->is_XMMRegister()) { 656 // We encode in the low order 4 bits of the opto_reg, how large part of the register is live 657 const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15); 658 const int reg_size = xmm_slot_size(opto_reg); 659 const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size }; 660 const int reg_index = _xmm_registers.find(reg_data); 661 if (reg_index == -1) { 662 // Not previously appended 663 _xmm_registers.append(reg_data); 664 xmm_spill_size += reg_size; 665 } else { 666 // Previously appended, update size 667 const int reg_size_prev = _xmm_registers.at(reg_index)._size; 668 if (reg_size > reg_size_prev) { 669 _xmm_registers.at_put(reg_index, reg_data); 670 xmm_spill_size += reg_size - reg_size_prev; 671 } 672 } 673 } else { 674 fatal("Unexpected register type"); 675 } 676 } 677 678 // Sort by size, largest first 679 _xmm_registers.sort(xmm_compare_register_size); 680 681 // On Windows, the caller reserves stack space for spilling register arguments 682 const int arg_spill_size = frame::arg_reg_save_area_bytes; 683 684 // Stack pointer must be 16 bytes aligned for the call 685 _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16); 686 } 687 688 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub) 689 : _masm(masm), 690 _gp_registers(), 691 _opmask_registers(), 692 _xmm_registers(), 693 _spill_size(0), 694 _spill_offset(0) { 695 696 // 697 // Stack layout after registers have been spilled: 698 // 699 // | ... | original rsp, 16 bytes aligned 700 // ------------------ 701 // | zmm0 high | 702 // | ... | 703 // | zmm0 low | 16 bytes aligned 704 // | ... | 705 // | ymm1 high | 706 // | ... | 707 // | ymm1 low | 16 bytes aligned 708 // | ... | 709 // | xmmN high | 710 // | ... | 711 // | xmmN low | 8 bytes aligned 712 // | reg0 | 8 bytes aligned 713 // | reg1 | 714 // | ... | 715 // | regN | new rsp, if 16 bytes aligned 716 // | <padding> | else new rsp, 16 bytes aligned 717 // ------------------ 718 // 719 720 // Figure out what registers to save/restore 721 initialize(stub); 722 723 // Allocate stack space 724 if (_spill_size > 0) { 725 __ subptr(rsp, _spill_size); 726 } 727 728 // Save XMM/YMM/ZMM registers 729 for (int i = 0; i < _xmm_registers.length(); i++) { 730 xmm_register_save(_xmm_registers.at(i)); 731 } 732 733 if (xmm_needs_vzeroupper()) { 734 __ vzeroupper(); 735 } 736 737 // Save general purpose registers 738 for (int i = 0; i < _gp_registers.length(); i++) { 739 gp_register_save(_gp_registers.at(i)); 740 } 741 742 // Save opmask registers 743 for (int i = 0; i < _opmask_registers.length(); i++) { 744 opmask_register_save(_opmask_registers.at(i)); 745 } 746 } 747 748 SaveLiveRegisters::~SaveLiveRegisters() { 749 // Restore opmask registers 750 for (int i = _opmask_registers.length() - 1; i >= 0; i--) { 751 opmask_register_restore(_opmask_registers.at(i)); 752 } 753 754 // Restore general purpose registers 755 for (int i = _gp_registers.length() - 1; i >= 0; i--) { 756 gp_register_restore(_gp_registers.at(i)); 757 } 758 759 __ vzeroupper(); 760 761 // Restore XMM/YMM/ZMM registers 762 for (int i = _xmm_registers.length() - 1; i >= 0; i--) { 763 xmm_register_restore(_xmm_registers.at(i)); 764 } 765 766 // Free stack space 767 if (_spill_size > 0) { 768 __ addptr(rsp, _spill_size); 769 } 770 } 771 772 #else // !_LP64 773 774 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { 775 Unimplemented(); // This must be implemented to support late barrier expansion. 776 } 777 778 #endif // _LP64 779 780 #endif // COMPILER2