1 /*
2 * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "gc/shared/barrierSet.hpp"
27 #include "gc/shared/barrierSetAssembler.hpp"
28 #include "gc/shared/barrierSetNMethod.hpp"
29 #include "gc/shared/barrierSetRuntime.hpp"
30 #include "gc/shared/collectedHeap.hpp"
31 #include "interpreter/interp_masm.hpp"
32 #include "memory/universe.hpp"
33 #include "runtime/javaThread.hpp"
34 #include "runtime/jniHandles.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/stubRoutines.hpp"
37 #ifdef COMPILER2
38 #include "code/vmreg.inline.hpp"
39 #include "gc/shared/c2/barrierSetC2.hpp"
40 #endif // COMPILER2
41
42
43 #define __ masm->
44
45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
46 Register dst, Address src, Register tmp1, Register tmp2) {
47
48 // LR is live. It must be saved around calls.
49
50 bool in_heap = (decorators & IN_HEAP) != 0;
51 bool in_native = (decorators & IN_NATIVE) != 0;
52 bool is_not_null = (decorators & IS_NOT_NULL) != 0;
53 switch (type) {
54 case T_OBJECT:
55 case T_ARRAY: {
56 if (in_heap) {
57 if (UseCompressedOops) {
58 __ ldrw(dst, src);
59 if (is_not_null) {
60 __ decode_heap_oop_not_null(dst);
61 } else {
62 __ decode_heap_oop(dst);
63 }
64 } else {
65 __ ldr(dst, src);
66 }
67 } else {
68 assert(in_native, "why else?");
69 __ ldr(dst, src);
70 }
71 break;
72 }
73 case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
74 case T_BYTE: __ load_signed_byte (dst, src); break;
75 case T_CHAR: __ load_unsigned_short(dst, src); break;
76 case T_SHORT: __ load_signed_short (dst, src); break;
77 case T_INT: __ ldrw (dst, src); break;
78 case T_LONG: __ ldr (dst, src); break;
79 case T_ADDRESS: __ ldr (dst, src); break;
80 case T_FLOAT: __ ldrs (v0, src); break;
81 case T_DOUBLE: __ ldrd (v0, src); break;
82 default: Unimplemented();
83 }
84 }
85
86 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
87 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
88 bool in_heap = (decorators & IN_HEAP) != 0;
89 bool in_native = (decorators & IN_NATIVE) != 0;
90 bool is_not_null = (decorators & IS_NOT_NULL) != 0;
91
92 switch (type) {
93 case T_OBJECT:
94 case T_ARRAY: {
95 if (in_heap) {
96 if (val == noreg) {
97 assert(!is_not_null, "inconsistent access");
98 if (UseCompressedOops) {
99 __ strw(zr, dst);
100 } else {
101 __ str(zr, dst);
102 }
103 } else {
104 if (UseCompressedOops) {
105 assert(!dst.uses(val), "not enough registers");
106 if (is_not_null) {
107 __ encode_heap_oop_not_null(val);
108 } else {
109 __ encode_heap_oop(val);
110 }
111 __ strw(val, dst);
112 } else {
113 __ str(val, dst);
114 }
115 }
116 } else {
117 assert(in_native, "why else?");
118 assert(val != noreg, "not supported");
119 __ str(val, dst);
120 }
121 break;
122 }
123 case T_BOOLEAN:
124 __ andw(val, val, 0x1); // boolean is true if LSB is 1
125 __ strb(val, dst);
126 break;
127 case T_BYTE: __ strb(val, dst); break;
128 case T_CHAR: __ strh(val, dst); break;
129 case T_SHORT: __ strh(val, dst); break;
130 case T_INT: __ strw(val, dst); break;
131 case T_LONG: __ str (val, dst); break;
132 case T_ADDRESS: __ str (val, dst); break;
133 case T_FLOAT: __ strs(v0, dst); break;
134 case T_DOUBLE: __ strd(v0, dst); break;
135 default: Unimplemented();
136 }
137 }
138
139 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
140 Register src, Register dst, Register inline_layout_info) {
141 // flat_field_copy implementation is fairly complex, and there are not any
142 // "short-cuts" to be made from asm. What there is, appears to have the same
143 // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
144 // of hand-rolled instructions...
145 if (decorators & IS_DEST_UNINITIALIZED) {
146 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info);
147 } else {
148 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info);
149 }
150 }
151
152 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
153 DecoratorSet decorators,
154 BasicType type,
155 size_t bytes,
156 Register dst1,
157 Register dst2,
158 Address src,
159 Register tmp) {
160 if (bytes == 1) {
161 assert(dst2 == noreg, "invariant");
162 __ ldrb(dst1, src);
163 } else if (bytes == 2) {
164 assert(dst2 == noreg, "invariant");
165 __ ldrh(dst1, src);
166 } else if (bytes == 4) {
167 assert(dst2 == noreg, "invariant");
168 __ ldrw(dst1, src);
169 } else if (bytes == 8) {
170 assert(dst2 == noreg, "invariant");
171 __ ldr(dst1, src);
172 } else if (bytes == 16) {
173 assert(dst2 != noreg, "invariant");
174 assert(dst2 != dst1, "invariant");
175 __ ldp(dst1, dst2, src);
176 } else {
177 // Not the right size
178 ShouldNotReachHere();
179 }
180 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
181 __ decode_heap_oop(dst1);
182 }
183 }
184
185 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
186 DecoratorSet decorators,
187 BasicType type,
188 size_t bytes,
189 Address dst,
190 Register src1,
191 Register src2,
192 Register tmp1,
193 Register tmp2,
194 Register tmp3) {
195 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
196 __ encode_heap_oop(src1);
197 }
198 if (bytes == 1) {
199 assert(src2 == noreg, "invariant");
200 __ strb(src1, dst);
201 } else if (bytes == 2) {
202 assert(src2 == noreg, "invariant");
203 __ strh(src1, dst);
204 } else if (bytes == 4) {
205 assert(src2 == noreg, "invariant");
206 __ strw(src1, dst);
207 } else if (bytes == 8) {
208 assert(src2 == noreg, "invariant");
209 __ str(src1, dst);
210 } else if (bytes == 16) {
211 assert(src2 != noreg, "invariant");
212 assert(src2 != src1, "invariant");
213 __ stp(src1, src2, dst);
214 } else {
215 // Not the right size
216 ShouldNotReachHere();
217 }
218 }
219
220 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
221 DecoratorSet decorators,
222 BasicType type,
223 size_t bytes,
224 FloatRegister dst1,
225 FloatRegister dst2,
226 Address src,
227 Register tmp1,
228 Register tmp2,
229 FloatRegister vec_tmp) {
230 if (bytes == 32) {
231 __ ldpq(dst1, dst2, src);
232 } else {
233 ShouldNotReachHere();
234 }
235 }
236
237 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
238 DecoratorSet decorators,
239 BasicType type,
240 size_t bytes,
241 Address dst,
242 FloatRegister src1,
243 FloatRegister src2,
244 Register tmp1,
245 Register tmp2,
246 Register tmp3,
247 FloatRegister vec_tmp1,
248 FloatRegister vec_tmp2,
249 FloatRegister vec_tmp3) {
250 if (bytes == 32) {
251 __ stpq(src1, src2, dst);
252 } else {
253 ShouldNotReachHere();
254 }
255 }
256
257 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
258 Register obj, Register tmp, Label& slowpath) {
259 // If mask changes we need to ensure that the inverse is still encodable as an immediate
260 STATIC_ASSERT(JNIHandles::tag_mask == 0b11);
261 __ andr(obj, obj, ~JNIHandles::tag_mask);
262 __ ldr(obj, Address(obj, 0)); // *obj
263 }
264
265 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
266 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
267 Register var_size_in_bytes,
268 int con_size_in_bytes,
269 Register t1,
270 Register t2,
271 Label& slow_case) {
272 assert_different_registers(obj, t2);
273 assert_different_registers(obj, var_size_in_bytes);
274 Register end = t2;
275
276 // verify_tlab();
277
278 __ ldr(obj, Address(rthread, JavaThread::tlab_top_offset()));
279 if (var_size_in_bytes == noreg) {
280 __ lea(end, Address(obj, con_size_in_bytes));
281 } else {
282 __ lea(end, Address(obj, var_size_in_bytes));
283 }
284 __ ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset()));
285 __ cmp(end, rscratch1);
286 __ br(Assembler::HI, slow_case);
287
288 // update the tlab top pointer
289 __ str(end, Address(rthread, JavaThread::tlab_top_offset()));
290
291 // recover var_size_in_bytes if necessary
292 if (var_size_in_bytes == end) {
293 __ sub(var_size_in_bytes, var_size_in_bytes, obj);
294 }
295 // verify_tlab();
296 }
297
298 static volatile uint32_t _patching_epoch = 0;
299
300 address BarrierSetAssembler::patching_epoch_addr() {
301 return (address)&_patching_epoch;
302 }
303
304 void BarrierSetAssembler::increment_patching_epoch() {
305 AtomicAccess::inc(&_patching_epoch);
306 }
307
308 void BarrierSetAssembler::clear_patching_epoch() {
309 _patching_epoch = 0;
310 }
311
312 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
313 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
314
315 Label local_guard;
316 Label skip_barrier;
317 NMethodPatchingType patching_type = nmethod_patching_type();
318
319 if (slow_path == nullptr) {
320 guard = &local_guard;
321 }
322
323 // If the slow path is out of line in a stub, we flip the condition
324 Assembler::Condition condition = slow_path == nullptr ? Assembler::EQ : Assembler::NE;
325 Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
326
327 __ ldrw(rscratch1, *guard);
328
329 if (patching_type == NMethodPatchingType::stw_instruction_and_data_patch) {
330 // With STW patching, no data or instructions are updated concurrently,
331 // which means there isn't really any need for any fencing for neither
332 // data nor instruction modifications happening concurrently. The
333 // instruction patching is handled with isb fences on the way back
334 // from the safepoint to Java. So here we can do a plain conditional
335 // branch with no fencing.
336 Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
337 __ ldrw(rscratch2, thread_disarmed_addr);
338 __ cmp(rscratch1, rscratch2);
339 } else if (patching_type == NMethodPatchingType::conc_instruction_and_data_patch) {
340 // If we patch code we need both a code patching and a loadload
341 // fence. It's not super cheap, so we use a global epoch mechanism
342 // to hide them in a slow path.
343 // The high level idea of the global epoch mechanism is to detect
344 // when any thread has performed the required fencing, after the
345 // last nmethod was disarmed. This implies that the required
346 // fencing has been performed for all preceding nmethod disarms
347 // as well. Therefore, we do not need any further fencing.
348 __ lea(rscratch2, ExternalAddress((address)&_patching_epoch));
349 // Embed an artificial data dependency to order the guard load
350 // before the epoch load.
351 __ orr(rscratch2, rscratch2, rscratch1, Assembler::LSR, 32);
352 // Read the global epoch value.
353 __ ldrw(rscratch2, rscratch2);
354 // Combine the guard value (low order) with the epoch value (high order).
355 __ orr(rscratch1, rscratch1, rscratch2, Assembler::LSL, 32);
356 // Compare the global values with the thread-local values.
357 Address thread_disarmed_and_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
358 __ ldr(rscratch2, thread_disarmed_and_epoch_addr);
359 __ cmp(rscratch1, rscratch2);
360 } else {
361 ShouldNotReachHere();
362 }
363 __ br(condition, barrier_target);
364
365 if (slow_path == nullptr) {
366 __ lea(rscratch1, RuntimeAddress(StubRoutines::method_entry_barrier()));
367 __ blr(rscratch1);
368 __ b(skip_barrier);
369
370 __ bind(local_guard);
371
372 __ emit_int32(0); // nmethod guard value. Skipped over in common case.
373 } else {
374 __ bind(*continuation);
375 }
376
377 __ bind(skip_barrier);
378 }
379
380 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
381 Label bad_call;
382 __ cbz(rmethod, bad_call);
383
384 // Pointer chase to the method holder to find out if the method is concurrently unloading.
385 Label method_live;
386 __ load_method_holder_cld(rscratch1, rmethod);
387
388 // Is it a strong CLD?
389 __ ldrw(rscratch2, Address(rscratch1, ClassLoaderData::keep_alive_ref_count_offset()));
390 __ cbnz(rscratch2, method_live);
391
392 // Is it a weak but alive CLD?
393 __ push(RegSet::of(r10), sp);
394 __ ldr(r10, Address(rscratch1, ClassLoaderData::holder_offset()));
395
396 __ resolve_weak_handle(r10, rscratch1, rscratch2);
397 __ mov(rscratch1, r10);
398 __ pop(RegSet::of(r10), sp);
399 __ cbnz(rscratch1, method_live);
400
401 __ bind(bad_call);
402
403 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
404 __ bind(method_live);
405 }
406
407 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
408 // Check if the oop is in the right area of memory
409 __ mov(tmp2, (intptr_t) Universe::verify_oop_mask());
410 __ andr(tmp1, obj, tmp2);
411 __ mov(tmp2, (intptr_t) Universe::verify_oop_bits());
412
413 // Compare tmp1 and tmp2. We don't use a compare
414 // instruction here because the flags register is live.
415 __ eor(tmp1, tmp1, tmp2);
416 __ cbnz(tmp1, error);
417
418 // make sure klass is 'reasonable', which is not zero.
419 __ load_klass(obj, obj); // get klass
420 __ cbz(obj, error); // if klass is null it is broken
421 }
422
423 void BarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Register tmp, Label& slow_path) {
424 // Load the oop from the weak handle without barriers.
425 __ ldr(obj, Address(weak_handle));
426 }
427
428 #ifdef COMPILER2
429
430 OptoReg::Name BarrierSetAssembler::encode_float_vector_register_size(const Node* node, OptoReg::Name opto_reg) {
431 switch (node->ideal_reg()) {
432 case Op_RegF:
433 case Op_RegI: // RA may place scalar values (Op_RegI/N/L/P) in FP registers when UseFPUForSpilling is enabled
434 case Op_RegN:
435 // No need to refine. The original encoding is already fine to distinguish.
436 assert(opto_reg % 4 == 0, "32-bit register should only occupy a single slot");
437 break;
438 // Use different encoding values of the same fp/vector register to help distinguish different sizes.
439 // Such as V16. The OptoReg::name and its corresponding slot value are
440 // "V16": 64, "V16_H": 65, "V16_J": 66, "V16_K": 67.
441 case Op_RegD:
442 case Op_VecD:
443 case Op_RegL:
444 case Op_RegP:
445 opto_reg &= ~3;
446 opto_reg |= 1;
447 break;
448 case Op_VecX:
449 opto_reg &= ~3;
450 opto_reg |= 2;
451 break;
452 case Op_VecA:
453 opto_reg &= ~3;
454 opto_reg |= 3;
455 break;
456 default:
457 assert(false, "unexpected ideal register");
458 ShouldNotReachHere();
459 }
460 return opto_reg;
461 }
462
463 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
464 if (!OptoReg::is_reg(opto_reg)) {
465 return OptoReg::Bad;
466 }
467
468 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
469 if (vm_reg->is_FloatRegister()) {
470 opto_reg = encode_float_vector_register_size(node, opto_reg);
471 }
472
473 return opto_reg;
474 }
475 #undef __
476 #define __ _masm->
477
478 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
479 int index = -1;
480 GrowableArray<RegisterData> registers;
481 VMReg prev_vm_reg = VMRegImpl::Bad();
482
483 RegMaskIterator rmi(stub->preserve_set());
484 while (rmi.has_next()) {
485 OptoReg::Name opto_reg = rmi.next();
486 VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
487
488 if (vm_reg->is_Register()) {
489 // GPR may have one or two slots in regmask
490 // Determine whether the current vm_reg is the same physical register as the previous one
491 if (is_same_register(vm_reg, prev_vm_reg)) {
492 registers.at(index)._slots++;
493 } else {
494 RegisterData reg_data = { vm_reg, 1 };
495 index = registers.append(reg_data);
496 }
497 } else if (vm_reg->is_FloatRegister()) {
498 // We have size encoding in OptoReg of stub->preserve_set()
499 // After encoding, float/neon/sve register has only one slot in regmask
500 // Decode it to get the actual size
501 VMReg vm_reg_base = vm_reg->as_FloatRegister()->as_VMReg();
502 int slots = decode_float_vector_register_size(opto_reg);
503 RegisterData reg_data = { vm_reg_base, slots };
504 index = registers.append(reg_data);
505 } else if (vm_reg->is_PRegister()) {
506 // PRegister has only one slot in regmask
507 RegisterData reg_data = { vm_reg, 1 };
508 index = registers.append(reg_data);
509 } else {
510 assert(false, "Unknown register type");
511 ShouldNotReachHere();
512 }
513 prev_vm_reg = vm_reg;
514 }
515
516 // Record registers that needs to be saved/restored
517 for (GrowableArrayIterator<RegisterData> it = registers.begin(); it != registers.end(); ++it) {
518 RegisterData reg_data = *it;
519 VMReg vm_reg = reg_data._reg;
520 int slots = reg_data._slots;
521 if (vm_reg->is_Register()) {
522 assert(slots == 1 || slots == 2, "Unexpected register save size");
523 _gp_regs += RegSet::of(vm_reg->as_Register());
524 } else if (vm_reg->is_FloatRegister()) {
525 if (slots == 1 || slots == 2) {
526 _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
527 } else if (slots == 4) {
528 _neon_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
529 } else {
530 assert(slots == Matcher::scalable_vector_reg_size(T_FLOAT), "Unexpected register save size");
531 _sve_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
532 }
533 } else {
534 assert(vm_reg->is_PRegister() && slots == 1, "Unknown register type");
535 _p_regs += PRegSet::of(vm_reg->as_PRegister());
536 }
537 }
538
539 // Remove C-ABI SOE registers and scratch regs
540 _gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9);
541
542 // Remove C-ABI SOE fp registers
543 _fp_regs -= FloatRegSet::range(v8, v15);
544 }
545
546 enum RC SaveLiveRegisters::rc_class(VMReg reg) {
547 if (reg->is_reg()) {
548 if (reg->is_Register()) {
549 return rc_int;
550 } else if (reg->is_FloatRegister()) {
551 return rc_float;
552 } else if (reg->is_PRegister()) {
553 return rc_predicate;
554 }
555 }
556 if (reg->is_stack()) {
557 return rc_stack;
558 }
559 return rc_bad;
560 }
561
562 bool SaveLiveRegisters::is_same_register(VMReg reg1, VMReg reg2) {
563 if (reg1 == reg2) {
564 return true;
565 }
566 if (rc_class(reg1) == rc_class(reg2)) {
567 if (reg1->is_Register()) {
568 return reg1->as_Register() == reg2->as_Register();
569 } else if (reg1->is_FloatRegister()) {
570 return reg1->as_FloatRegister() == reg2->as_FloatRegister();
571 } else if (reg1->is_PRegister()) {
572 return reg1->as_PRegister() == reg2->as_PRegister();
573 }
574 }
575 return false;
576 }
577
578 int SaveLiveRegisters::decode_float_vector_register_size(OptoReg::Name opto_reg) {
579 switch (opto_reg & 3) {
580 case 0:
581 return 1;
582 case 1:
583 return 2;
584 case 2:
585 return 4;
586 case 3:
587 return Matcher::scalable_vector_reg_size(T_FLOAT);
588 default:
589 ShouldNotReachHere();
590 return 0;
591 }
592 }
593
594 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
595 : _masm(masm),
596 _gp_regs(),
597 _fp_regs(),
598 _neon_regs(),
599 _sve_regs(),
600 _p_regs() {
601
602 // Figure out what registers to save/restore
603 initialize(stub);
604
605 // Save registers
606 __ push(_gp_regs, sp);
607 __ push_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
608 __ push_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
609 __ push_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
610 __ push_p(_p_regs, sp);
611 }
612
613 SaveLiveRegisters::~SaveLiveRegisters() {
614 // Restore registers
615 __ pop_p(_p_regs, sp);
616 __ pop_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
617 __ pop_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
618 __ pop_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
619
620 // External runtime call may clobber ptrue reg
621 __ reinitialize_ptrue();
622
623 __ pop(_gp_regs, sp);
624 }
625
626 #endif // COMPILER2