1 /*
2 * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved.
4 * Copyright 2024, 2026 IBM Corporation. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "asm/codeBuffer.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/compiledIC.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc/shared/barrierSet.hpp"
32 #include "gc/shared/barrierSetAssembler.hpp"
33 #include "gc/shared/collectedHeap.inline.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "gc/shared/cardTableBarrierSet.hpp"
36 #include "memory/resourceArea.hpp"
37 #include "memory/universe.hpp"
38 #include "oops/accessDecorators.hpp"
39 #include "oops/compressedKlass.inline.hpp"
40 #include "oops/compressedOops.inline.hpp"
41 #include "oops/klass.inline.hpp"
42 #include "prims/methodHandles.hpp"
43 #include "registerSaver_s390.hpp"
44 #include "runtime/icache.hpp"
45 #include "runtime/interfaceSupport.inline.hpp"
46 #include "runtime/objectMonitor.hpp"
47 #include "runtime/objectMonitorTable.hpp"
48 #include "runtime/os.hpp"
49 #include "runtime/safepoint.hpp"
50 #include "runtime/safepointMechanism.hpp"
51 #include "runtime/sharedRuntime.hpp"
52 #include "runtime/stubRoutines.hpp"
53 #include "utilities/events.hpp"
54 #include "utilities/macros.hpp"
55 #include "utilities/powerOfTwo.hpp"
56
57 #include <ucontext.h>
58
59 #define BLOCK_COMMENT(str) block_comment(str)
60 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
61
62 // Move 32-bit register if destination and source are different.
63 void MacroAssembler::lr_if_needed(Register rd, Register rs) {
64 if (rs != rd) { z_lr(rd, rs); }
65 }
66
67 // Move register if destination and source are different.
68 void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
69 if (rs != rd) { z_lgr(rd, rs); }
70 }
71
72 // Zero-extend 32-bit register into 64-bit register if destination and source are different.
73 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
74 if (rs != rd) { z_llgfr(rd, rs); }
75 }
76
77 // Move float register if destination and source are different.
78 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
79 if (rs != rd) { z_ldr(rd, rs); }
80 }
81
82 // Move integer register if destination and source are different.
83 // It is assumed that shorter-than-int types are already
84 // appropriately sign-extended.
85 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
86 BasicType src_type) {
87 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
88 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
89
90 if (dst_type == src_type) {
91 lgr_if_needed(dst, src); // Just move all 64 bits.
92 return;
93 }
94
95 switch (dst_type) {
96 // Do not support these types for now.
97 // case T_BOOLEAN:
98 case T_BYTE: // signed byte
99 switch (src_type) {
100 case T_INT:
101 z_lgbr(dst, src);
102 break;
103 default:
104 ShouldNotReachHere();
105 }
106 return;
107
108 case T_CHAR:
109 case T_SHORT:
110 switch (src_type) {
111 case T_INT:
112 if (dst_type == T_CHAR) {
113 z_llghr(dst, src);
114 } else {
115 z_lghr(dst, src);
116 }
117 break;
118 default:
119 ShouldNotReachHere();
120 }
121 return;
122
123 case T_INT:
124 switch (src_type) {
125 case T_BOOLEAN:
126 case T_BYTE:
127 case T_CHAR:
128 case T_SHORT:
129 case T_INT:
130 case T_LONG:
131 case T_OBJECT:
132 case T_ARRAY:
133 case T_VOID:
134 case T_ADDRESS:
135 lr_if_needed(dst, src);
136 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug).
137 return;
138
139 default:
140 assert(false, "non-integer src type");
141 return;
142 }
143 case T_LONG:
144 switch (src_type) {
145 case T_BOOLEAN:
146 case T_BYTE:
147 case T_CHAR:
148 case T_SHORT:
149 case T_INT:
150 z_lgfr(dst, src); // sign extension
151 return;
152
153 case T_LONG:
154 case T_OBJECT:
155 case T_ARRAY:
156 case T_VOID:
157 case T_ADDRESS:
158 lgr_if_needed(dst, src);
159 return;
160
161 default:
162 assert(false, "non-integer src type");
163 return;
164 }
165 return;
166 case T_OBJECT:
167 case T_ARRAY:
168 case T_VOID:
169 case T_ADDRESS:
170 switch (src_type) {
171 // These types don't make sense to be converted to pointers:
172 // case T_BOOLEAN:
173 // case T_BYTE:
174 // case T_CHAR:
175 // case T_SHORT:
176
177 case T_INT:
178 z_llgfr(dst, src); // zero extension
179 return;
180
181 case T_LONG:
182 case T_OBJECT:
183 case T_ARRAY:
184 case T_VOID:
185 case T_ADDRESS:
186 lgr_if_needed(dst, src);
187 return;
188
189 default:
190 assert(false, "non-integer src type");
191 return;
192 }
193 return;
194 default:
195 assert(false, "non-integer dst type");
196 return;
197 }
198 }
199
200 // Move float register if destination and source are different.
201 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
202 FloatRegister src, BasicType src_type) {
203 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
204 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
205 if (dst_type == src_type) {
206 ldr_if_needed(dst, src); // Just move all 64 bits.
207 } else {
208 switch (dst_type) {
209 case T_FLOAT:
210 assert(src_type == T_DOUBLE, "invalid float type combination");
211 z_ledbr(dst, src);
212 return;
213 case T_DOUBLE:
214 assert(src_type == T_FLOAT, "invalid float type combination");
215 z_ldebr(dst, src);
216 return;
217 default:
218 assert(false, "non-float dst type");
219 return;
220 }
221 }
222 }
223
224 // Optimized emitter for reg to mem operations.
225 // Uses modern instructions if running on modern hardware, classic instructions
226 // otherwise. Prefers (usually shorter) classic instructions if applicable.
227 // Data register (reg) cannot be used as work register.
228 //
229 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
230 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
231 void MacroAssembler::freg2mem_opt(FloatRegister reg,
232 int64_t disp,
233 Register index,
234 Register base,
235 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
236 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
237 Register scratch) {
238 index = (index == noreg) ? Z_R0 : index;
239 if (Displacement::is_shortDisp(disp)) {
240 (this->*classic)(reg, disp, index, base);
241 } else {
242 if (Displacement::is_validDisp(disp)) {
243 (this->*modern)(reg, disp, index, base);
244 } else {
245 if (scratch != Z_R0 && scratch != Z_R1) {
246 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
247 } else {
248 if (scratch != Z_R0) { // scratch == Z_R1
249 if ((scratch == index) || (index == base)) {
250 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
251 } else {
252 add2reg(scratch, disp, base);
253 (this->*classic)(reg, 0, index, scratch);
254 if (base == scratch) {
255 add2reg(base, -disp); // Restore base.
256 }
257 }
258 } else { // scratch == Z_R0
259 z_lgr(scratch, base);
260 add2reg(base, disp);
261 (this->*classic)(reg, 0, index, base);
262 z_lgr(base, scratch); // Restore base.
263 }
264 }
265 }
266 }
267 }
268
269 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
270 if (is_double) {
271 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
272 } else {
273 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
274 }
275 }
276
277 // Optimized emitter for mem to reg operations.
278 // Uses modern instructions if running on modern hardware, classic instructions
279 // otherwise. Prefers (usually shorter) classic instructions if applicable.
280 // data register (reg) cannot be used as work register.
281 //
282 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
283 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
284 void MacroAssembler::mem2freg_opt(FloatRegister reg,
285 int64_t disp,
286 Register index,
287 Register base,
288 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
289 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
290 Register scratch) {
291 index = (index == noreg) ? Z_R0 : index;
292 if (Displacement::is_shortDisp(disp)) {
293 (this->*classic)(reg, disp, index, base);
294 } else {
295 if (Displacement::is_validDisp(disp)) {
296 (this->*modern)(reg, disp, index, base);
297 } else {
298 if (scratch != Z_R0 && scratch != Z_R1) {
299 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
300 } else {
301 if (scratch != Z_R0) { // scratch == Z_R1
302 if ((scratch == index) || (index == base)) {
303 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
304 } else {
305 add2reg(scratch, disp, base);
306 (this->*classic)(reg, 0, index, scratch);
307 if (base == scratch) {
308 add2reg(base, -disp); // Restore base.
309 }
310 }
311 } else { // scratch == Z_R0
312 z_lgr(scratch, base);
313 add2reg(base, disp);
314 (this->*classic)(reg, 0, index, base);
315 z_lgr(base, scratch); // Restore base.
316 }
317 }
318 }
319 }
320 }
321
322 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
323 if (is_double) {
324 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
325 } else {
326 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
327 }
328 }
329
330 // Optimized emitter for reg to mem operations.
331 // Uses modern instructions if running on modern hardware, classic instructions
332 // otherwise. Prefers (usually shorter) classic instructions if applicable.
333 // Data register (reg) cannot be used as work register.
334 //
335 // Don't rely on register locking, instead pass a scratch register
336 // (Z_R0 by default)
337 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
338 void MacroAssembler::reg2mem_opt(Register reg,
339 int64_t disp,
340 Register index,
341 Register base,
342 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
343 void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
344 Register scratch) {
345 index = (index == noreg) ? Z_R0 : index;
346 if (Displacement::is_shortDisp(disp)) {
347 (this->*classic)(reg, disp, index, base);
348 } else {
349 if (Displacement::is_validDisp(disp)) {
350 (this->*modern)(reg, disp, index, base);
351 } else {
352 if (scratch != Z_R0 && scratch != Z_R1) {
353 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
354 } else {
355 if (scratch != Z_R0) { // scratch == Z_R1
356 if ((scratch == index) || (index == base)) {
357 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
358 } else {
359 add2reg(scratch, disp, base);
360 (this->*classic)(reg, 0, index, scratch);
361 if (base == scratch) {
362 add2reg(base, -disp); // Restore base.
363 }
364 }
365 } else { // scratch == Z_R0
366 if ((scratch == reg) || (scratch == base) || (reg == base)) {
367 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
368 } else {
369 z_lgr(scratch, base);
370 add2reg(base, disp);
371 (this->*classic)(reg, 0, index, base);
372 z_lgr(base, scratch); // Restore base.
373 }
374 }
375 }
376 }
377 }
378 }
379
380 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
381 int store_offset = offset();
382 if (is_double) {
383 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
384 } else {
385 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
386 }
387 return store_offset;
388 }
389
390 // Optimized emitter for mem to reg operations.
391 // Uses modern instructions if running on modern hardware, classic instructions
392 // otherwise. Prefers (usually shorter) classic instructions if applicable.
393 // Data register (reg) will be used as work register where possible.
394 void MacroAssembler::mem2reg_opt(Register reg,
395 int64_t disp,
396 Register index,
397 Register base,
398 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
399 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
400 index = (index == noreg) ? Z_R0 : index;
401 if (Displacement::is_shortDisp(disp)) {
402 (this->*classic)(reg, disp, index, base);
403 } else {
404 if (Displacement::is_validDisp(disp)) {
405 (this->*modern)(reg, disp, index, base);
406 } else {
407 if ((reg == index) && (reg == base)) {
408 z_sllg(reg, reg, 1);
409 add2reg(reg, disp);
410 (this->*classic)(reg, 0, noreg, reg);
411 } else if ((reg == index) && (reg != Z_R0)) {
412 add2reg(reg, disp);
413 (this->*classic)(reg, 0, reg, base);
414 } else if (reg == base) {
415 add2reg(reg, disp);
416 (this->*classic)(reg, 0, index, reg);
417 } else if (reg != Z_R0) {
418 add2reg(reg, disp, base);
419 (this->*classic)(reg, 0, index, reg);
420 } else { // reg == Z_R0 && reg != base here
421 add2reg(base, disp);
422 (this->*classic)(reg, 0, index, base);
423 add2reg(base, -disp);
424 }
425 }
426 }
427 }
428
429 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
430 if (is_double) {
431 z_lg(reg, a);
432 } else {
433 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
434 }
435 }
436
437 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
438 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
439 }
440
441 void MacroAssembler::and_imm(Register r, long mask,
442 Register tmp /* = Z_R0 */,
443 bool wide /* = false */) {
444 assert(wide || Immediate::is_simm32(mask), "mask value too large");
445
446 if (!wide) {
447 z_nilf(r, mask);
448 return;
449 }
450
451 assert(r != tmp, " need a different temporary register !");
452 load_const_optimized(tmp, mask);
453 z_ngr(r, tmp);
454 }
455
456 // Calculate the 1's complement.
457 // Note: The condition code is neither preserved nor correctly set by this code!!!
458 // Note: (wide == false) does not protect the high order half of the target register
459 // from alteration. It only serves as optimization hint for 32-bit results.
460 void MacroAssembler::not_(Register r1, Register r2, bool wide) {
461
462 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
463 z_xilf(r1, -1);
464 if (wide) {
465 z_xihf(r1, -1);
466 }
467 } else { // Distinct src and dst registers.
468 load_const_optimized(r1, -1);
469 z_xgr(r1, r2);
470 }
471 }
472
473 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
474 assert(lBitPos >= 0, "zero is leftmost bit position");
475 assert(rBitPos <= 63, "63 is rightmost bit position");
476 assert(lBitPos <= rBitPos, "inverted selection interval");
477 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
478 }
479
480 // Helper function for the "Rotate_then_<logicalOP>" emitters.
481 // Rotate src, then mask register contents such that only bits in range survive.
482 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
483 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range.
484 // The caller must ensure that the selected range only contains bits with defined value.
485 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
486 int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
487 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
488 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
489 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G).
490 // Pre-determine which parts of dst will be zero after shift/rotate.
491 bool llZero = sll4rll && (nRotate >= 16);
492 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
493 bool lfZero = llZero && lhZero;
494 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
495 bool hhZero = (srl4rll && (nRotate <= -16));
496 bool hfZero = hlZero && hhZero;
497
498 // rotate then mask src operand.
499 // if oneBits == true, all bits outside selected range are 1s.
500 // if oneBits == false, all bits outside selected range are 0s.
501 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away.
502 if (dst32bit) {
503 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed.
504 } else {
505 if (sll4rll) { z_sllg(dst, src, nRotate); }
506 else if (srl4rll) { z_srlg(dst, src, -nRotate); }
507 else { z_rllg(dst, src, nRotate); }
508 }
509 } else {
510 if (sll4rll) { z_sllg(dst, src, nRotate); }
511 else if (srl4rll) { z_srlg(dst, src, -nRotate); }
512 else { z_rllg(dst, src, nRotate); }
513 }
514
515 unsigned long range_mask = create_mask(lBitPos, rBitPos);
516 unsigned int range_mask_h = (unsigned int)(range_mask >> 32);
517 unsigned int range_mask_l = (unsigned int)range_mask;
518 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
519 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
520 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
521 unsigned short range_mask_ll = (unsigned short)range_mask;
522 // Works for z9 and newer H/W.
523 if (oneBits) {
524 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
525 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
526 } else {
527 // All bits outside range become 0s
528 if (((~range_mask_l) != 0) && !lfZero) {
529 z_nilf(dst, range_mask_l);
530 }
531 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
532 z_nihf(dst, range_mask_h);
533 }
534 }
535 }
536
537 // Rotate src, then insert selected range from rotated src into dst.
538 // Clear dst before, if requested.
539 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
540 int nRotate, bool clear_dst) {
541 // This version does not depend on src being zero-extended int2long.
542 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
543 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
544 }
545
546 // Rotate src, then and selected range from rotated src into dst.
547 // Set condition code only if so requested. Otherwise it is unpredictable.
548 // See performance note in macroAssembler_s390.hpp for important information.
549 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
550 int nRotate, bool test_only) {
551 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
552 // This version does not depend on src being zero-extended int2long.
553 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
554 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
555 }
556
557 // Rotate src, then or selected range from rotated src into dst.
558 // Set condition code only if so requested. Otherwise it is unpredictable.
559 // See performance note in macroAssembler_s390.hpp for important information.
560 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos,
561 int nRotate, bool test_only) {
562 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
563 // This version does not depend on src being zero-extended int2long.
564 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
565 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
566 }
567
568 // Rotate src, then xor selected range from rotated src into dst.
569 // Set condition code only if so requested. Otherwise it is unpredictable.
570 // See performance note in macroAssembler_s390.hpp for important information.
571 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos,
572 int nRotate, bool test_only) {
573 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
574 // This version does not depend on src being zero-extended int2long.
575 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
576 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
577 }
578
579 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
580 if (inc.is_register()) {
581 z_agr(r1, inc.as_register());
582 } else { // constant
583 intptr_t imm = inc.as_constant();
584 add2reg(r1, imm);
585 }
586 }
587 // Helper function to multiply the 64bit contents of a register by a 16bit constant.
588 // The optimization tries to avoid the mghi instruction, since it uses the FPU for
589 // calculation and is thus rather slow.
590 //
591 // There is no handling for special cases, e.g. cval==0 or cval==1.
592 //
593 // Returns len of generated code block.
594 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
595 int block_start = offset();
596
597 bool sign_flip = cval < 0;
598 cval = sign_flip ? -cval : cval;
599
600 BLOCK_COMMENT("Reg64*Con16 {");
601
602 int bit1 = cval & -cval;
603 if (bit1 == cval) {
604 z_sllg(rval, rval, exact_log2(bit1));
605 if (sign_flip) { z_lcgr(rval, rval); }
606 } else {
607 int bit2 = (cval-bit1) & -(cval-bit1);
608 if ((bit1+bit2) == cval) {
609 z_sllg(work, rval, exact_log2(bit1));
610 z_sllg(rval, rval, exact_log2(bit2));
611 z_agr(rval, work);
612 if (sign_flip) { z_lcgr(rval, rval); }
613 } else {
614 if (sign_flip) { z_mghi(rval, -cval); }
615 else { z_mghi(rval, cval); }
616 }
617 }
618 BLOCK_COMMENT("} Reg64*Con16");
619
620 int block_end = offset();
621 return block_end - block_start;
622 }
623
624 // Generic operation r1 := r2 + imm.
625 //
626 // Should produce the best code for each supported CPU version.
627 // r2 == noreg yields r1 := r1 + imm
628 // imm == 0 emits either no instruction or r1 := r2 !
629 // NOTES: 1) Don't use this function where fixed sized
630 // instruction sequences are required!!!
631 // 2) Don't use this function if condition code
632 // setting is required!
633 // 3) Despite being declared as int64_t, the parameter imm
634 // must be a simm_32 value (= signed 32-bit integer).
635 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
636 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
637
638 if (r2 == noreg) { r2 = r1; }
639
640 // Handle special case imm == 0.
641 if (imm == 0) {
642 lgr_if_needed(r1, r2);
643 // Nothing else to do.
644 return;
645 }
646
647 if (!PreferLAoverADD || (r2 == Z_R0)) {
648 bool distinctOpnds = VM_Version::has_DistinctOpnds();
649
650 // Can we encode imm in 16 bits signed?
651 if (Immediate::is_simm16(imm)) {
652 if (r1 == r2) {
653 z_aghi(r1, imm);
654 return;
655 }
656 if (distinctOpnds) {
657 z_aghik(r1, r2, imm);
658 return;
659 }
660 lgr_if_needed(r1, r2);
661 z_aghi(r1, imm);
662 return;
663 }
664 } else {
665 // Can we encode imm in 12 bits unsigned?
666 if (Displacement::is_shortDisp(imm)) {
667 z_la(r1, imm, r2);
668 return;
669 }
670 // Can we encode imm in 20 bits signed?
671 if (Displacement::is_validDisp(imm)) {
672 // Always use LAY instruction, so we don't need the tmp register.
673 z_lay(r1, imm, r2);
674 return;
675 }
676
677 }
678
679 // Can handle it (all possible values) with long immediates.
680 lgr_if_needed(r1, r2);
681 z_agfi(r1, imm);
682 }
683
684 void MacroAssembler::add2reg_32(Register r1, int64_t imm, Register r2) {
685 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
686
687 if (r2 == noreg) { r2 = r1; }
688
689 // Handle special case imm == 0.
690 if (imm == 0) {
691 lr_if_needed(r1, r2);
692 // Nothing else to do.
693 return;
694 }
695
696 if (Immediate::is_simm16(imm)) {
697 if (r1 == r2){
698 z_ahi(r1, imm);
699 return;
700 }
701 if (VM_Version::has_DistinctOpnds()) {
702 z_ahik(r1, r2, imm);
703 return;
704 }
705 lr_if_needed(r1, r2);
706 z_ahi(r1, imm);
707 return;
708 }
709
710 // imm is simm32
711 lr_if_needed(r1, r2);
712 z_afi(r1, imm);
713 }
714
715 // Generic operation r := b + x + d
716 //
717 // Addition of several operands with address generation semantics - sort of:
718 // - no restriction on the registers. Any register will do for any operand.
719 // - x == noreg: operand will be disregarded.
720 // - b == noreg: will use (contents of) result reg as operand (r := r + d).
721 // - x == Z_R0: just disregard
722 // - b == Z_R0: use as operand. This is not address generation semantics!!!
723 //
724 // The same restrictions as on add2reg() are valid!!!
725 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
726 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
727
728 if (x == noreg) { x = Z_R0; }
729 if (b == noreg) { b = r; }
730
731 // Handle special case x == R0.
732 if (x == Z_R0) {
733 // Can simply add the immediate value to the base register.
734 add2reg(r, d, b);
735 return;
736 }
737
738 if (!PreferLAoverADD || (b == Z_R0)) {
739 bool distinctOpnds = VM_Version::has_DistinctOpnds();
740 // Handle special case d == 0.
741 if (d == 0) {
742 if (b == x) { z_sllg(r, b, 1); return; }
743 if (r == x) { z_agr(r, b); return; }
744 if (r == b) { z_agr(r, x); return; }
745 if (distinctOpnds) { z_agrk(r, x, b); return; }
746 z_lgr(r, b);
747 z_agr(r, x);
748 } else {
749 if (x == b) { z_sllg(r, x, 1); }
750 else if (r == x) { z_agr(r, b); }
751 else if (r == b) { z_agr(r, x); }
752 else if (distinctOpnds) { z_agrk(r, x, b); }
753 else {
754 z_lgr(r, b);
755 z_agr(r, x);
756 }
757 add2reg(r, d);
758 }
759 } else {
760 // Can we encode imm in 12 bits unsigned?
761 if (Displacement::is_shortDisp(d)) {
762 z_la(r, d, x, b);
763 return;
764 }
765 // Can we encode imm in 20 bits signed?
766 if (Displacement::is_validDisp(d)) {
767 z_lay(r, d, x, b);
768 return;
769 }
770 z_la(r, 0, x, b);
771 add2reg(r, d);
772 }
773 }
774
775 // Generic emitter (32bit) for direct memory increment.
776 // For optimal code, do not specify Z_R0 as temp register.
777 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
778 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
779 z_asi(a, imm);
780 } else {
781 z_lgf(tmp, a);
782 add2reg(tmp, imm);
783 z_st(tmp, a);
784 }
785 }
786
787 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
788 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
789 z_agsi(a, imm);
790 } else {
791 z_lg(tmp, a);
792 add2reg(tmp, imm);
793 z_stg(tmp, a);
794 }
795 }
796
797 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
798 switch (size_in_bytes) {
799 case 8: z_lg(dst, src); break;
800 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
801 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
802 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
803 default: ShouldNotReachHere();
804 }
805 }
806
807 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
808 switch (size_in_bytes) {
809 case 8: z_stg(src, dst); break;
810 case 4: z_st(src, dst); break;
811 case 2: z_sth(src, dst); break;
812 case 1: z_stc(src, dst); break;
813 default: ShouldNotReachHere();
814 }
815 }
816
817 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
818 // a high-order summand in register tmp.
819 //
820 // return value: < 0: No split required, si20 actually has property uimm12.
821 // >= 0: Split performed. Use return value as uimm12 displacement and
822 // tmp as index register.
823 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
824 assert(Immediate::is_simm20(si20_offset), "sanity");
825 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive.
826 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
827 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
828 !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
829 assert((lg_off+ll_off) == si20_offset, "offset splitup error");
830
831 Register work = accumulate? Z_R0 : tmp;
832
833 if (fixed_codelen) { // Len of code = 10 = 4 + 6.
834 z_lghi(work, ll_off>>12); // Implicit sign extension.
835 z_slag(work, work, 12);
836 } else { // Len of code = 0..10.
837 if (ll_off == 0) { return -1; }
838 // ll_off has 8 significant bits (at most) plus sign.
839 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte.
840 z_llilh(work, ll_off >> 16);
841 if (ll_off < 0) { // Sign-extension required.
842 z_lgfr(work, work);
843 }
844 } else {
845 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte.
846 z_llill(work, ll_off);
847 } else { // Non-zero bits in both halfbytes.
848 z_lghi(work, ll_off>>12); // Implicit sign extension.
849 z_slag(work, work, 12);
850 }
851 }
852 }
853 if (accumulate) { z_algr(tmp, work); } // len of code += 4
854 return lg_off;
855 }
856
857 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
858 if (Displacement::is_validDisp(si20)) {
859 z_ley(t, si20, a);
860 } else {
861 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
862 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
863 // pool loads).
864 bool accumulate = true;
865 bool fixed_codelen = true;
866 Register work;
867
868 if (fixed_codelen) {
869 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
870 } else {
871 accumulate = (a == tmp);
872 }
873 work = tmp;
874
875 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
876 if (disp12 < 0) {
877 z_le(t, si20, work);
878 } else {
879 if (accumulate) {
880 z_le(t, disp12, work);
881 } else {
882 z_le(t, disp12, work, a);
883 }
884 }
885 }
886 }
887
888 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
889 if (Displacement::is_validDisp(si20)) {
890 z_ldy(t, si20, a);
891 } else {
892 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
893 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
894 // pool loads).
895 bool accumulate = true;
896 bool fixed_codelen = true;
897 Register work;
898
899 if (fixed_codelen) {
900 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
901 } else {
902 accumulate = (a == tmp);
903 }
904 work = tmp;
905
906 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
907 if (disp12 < 0) {
908 z_ld(t, si20, work);
909 } else {
910 if (accumulate) {
911 z_ld(t, disp12, work);
912 } else {
913 z_ld(t, disp12, work, a);
914 }
915 }
916 }
917 }
918
919 // PCrelative TOC access.
920 // Returns distance (in bytes) from current position to start of consts section.
921 // Returns 0 (zero) if no consts section exists or if it has size zero.
922 long MacroAssembler::toc_distance() {
923 CodeSection* cs = code()->consts();
924 return (long)((cs != nullptr) ? cs->start()-pc() : 0);
925 }
926
927 // Implementation on x86/sparc assumes that constant and instruction section are
928 // adjacent, but this doesn't hold. Two special situations may occur, that we must
929 // be able to handle:
930 // 1. const section may be located apart from the inst section.
931 // 2. const section may be empty
932 // In both cases, we use the const section's start address to compute the "TOC",
933 // this seems to occur only temporarily; in the final step we always seem to end up
934 // with the pc-relatice variant.
935 //
936 // PC-relative offset could be +/-2**32 -> use long for disp
937 // Furthermore: makes no sense to have special code for
938 // adjacent const and inst sections.
939 void MacroAssembler::load_toc(Register Rtoc) {
940 // Simply use distance from start of const section (should be patched in the end).
941 long disp = toc_distance();
942
943 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
944 relocate(rspec);
945 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords.
946 }
947
948 // PCrelative TOC access.
949 // Load from anywhere pcrelative (with relocation of load instr)
950 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
951 address pc = this->pc();
952 ptrdiff_t total_distance = dataLocation - pc;
953 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation);
954
955 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
956 assert(total_distance != 0, "sanity");
957
958 // Some extra safety net.
959 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
960 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
961 }
962
963 (this)->relocate(rspec, relocInfo::pcrel_addr_format);
964 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
965 }
966
967
968 // PCrelative TOC access.
969 // Load from anywhere pcrelative (with relocation of load instr)
970 // loaded addr has to be relocated when added to constant pool.
971 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
972 address pc = this->pc();
973 ptrdiff_t total_distance = addrLocation - pc;
974 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation);
975
976 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
977
978 // Some extra safety net.
979 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
980 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
981 }
982
983 (this)->relocate(rspec, relocInfo::pcrel_addr_format);
984 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
985 }
986
987 // Generic operation: load a value from memory and test.
988 // CondCode indicates the sign (<0, ==0, >0) of the loaded value.
989 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
990 z_lb(dst, a);
991 z_ltr(dst, dst);
992 }
993
994 void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
995 int64_t disp = a.disp20();
996 if (Displacement::is_shortDisp(disp)) {
997 z_lh(dst, a);
998 } else if (Displacement::is_longDisp(disp)) {
999 z_lhy(dst, a);
1000 } else {
1001 guarantee(false, "displacement out of range");
1002 }
1003 z_ltr(dst, dst);
1004 }
1005
1006 void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
1007 z_lt(dst, a);
1008 }
1009
1010 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
1011 z_ltgf(dst, a);
1012 }
1013
1014 void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
1015 z_ltg(dst, a);
1016 }
1017
1018 // Test a bit in memory for 2 byte datatype.
1019 void MacroAssembler::testbit_ushort(const Address &a, unsigned int bit) {
1020 assert(a.index() == noreg, "no index reg allowed in testbit");
1021 if (bit <= 7) {
1022 z_tm(a.disp() + 1, a.base(), 1 << bit);
1023 } else if (bit <= 15) {
1024 z_tm(a.disp() + 0, a.base(), 1 << (bit - 8));
1025 } else {
1026 ShouldNotReachHere();
1027 }
1028 }
1029
1030 // Test a bit in memory.
1031 void MacroAssembler::testbit(const Address &a, unsigned int bit) {
1032 assert(a.index() == noreg, "no index reg allowed in testbit");
1033 if (bit <= 7) {
1034 z_tm(a.disp() + 3, a.base(), 1 << bit);
1035 } else if (bit <= 15) {
1036 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
1037 } else if (bit <= 23) {
1038 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
1039 } else if (bit <= 31) {
1040 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
1041 } else {
1042 ShouldNotReachHere();
1043 }
1044 }
1045
1046 // Test a bit in a register. Result is reflected in CC.
1047 void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1048 if (bitPos < 16) {
1049 z_tmll(r, 1U<<bitPos);
1050 } else if (bitPos < 32) {
1051 z_tmlh(r, 1U<<(bitPos-16));
1052 } else if (bitPos < 48) {
1053 z_tmhl(r, 1U<<(bitPos-32));
1054 } else if (bitPos < 64) {
1055 z_tmhh(r, 1U<<(bitPos-48));
1056 } else {
1057 ShouldNotReachHere();
1058 }
1059 }
1060
1061 void MacroAssembler::prefetch_read(Address a) {
1062 z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
1063 }
1064 void MacroAssembler::prefetch_update(Address a) {
1065 z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
1066 }
1067
1068 // Clear a register, i.e. load const zero into reg.
1069 // Return len (in bytes) of generated instruction(s).
1070 // whole_reg: Clear 64 bits if true, 32 bits otherwise.
1071 // set_cc: Use instruction that sets the condition code, if true.
1072 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1073 unsigned int start_off = offset();
1074 if (whole_reg) {
1075 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1076 } else { // Only 32bit register.
1077 set_cc ? z_xr(r, r) : z_lhi(r, 0);
1078 }
1079 return offset() - start_off;
1080 }
1081
1082 #ifdef ASSERT
1083 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1084 switch (pattern_len) {
1085 case 1:
1086 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8);
1087 case 2:
1088 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16);
1089 case 4:
1090 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
1091 case 8:
1092 return load_const_optimized_rtn_len(r, pattern, true);
1093 break;
1094 default:
1095 guarantee(false, "preset_reg: bad len");
1096 }
1097 return 0;
1098 }
1099 #endif
1100
1101 // addr: Address descriptor of memory to clear. Index register will not be used!
1102 // size: Number of bytes to clear.
1103 // condition code will not be preserved.
1104 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
1105 // !!! Use store_const() instead !!!
1106 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) {
1107 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large");
1108
1109 switch (size) {
1110 case 0:
1111 return;
1112 case 1:
1113 z_mvi(addr, 0);
1114 return;
1115 case 2:
1116 z_mvhhi(addr, 0);
1117 return;
1118 case 4:
1119 z_mvhi(addr, 0);
1120 return;
1121 case 8:
1122 z_mvghi(addr, 0);
1123 return;
1124 default: ; // Fallthru to xc.
1125 }
1126
1127 // Caution: the emitter with Address operands does implicitly decrement the length
1128 if (size <= 256) {
1129 z_xc(addr, size, addr);
1130 } else {
1131 unsigned int offset = addr.disp();
1132 unsigned int incr = 256;
1133 for (unsigned int i = 0; i <= size-incr; i += incr) {
1134 z_xc(offset, incr - 1, addr.base(), offset, addr.base());
1135 offset += incr;
1136 }
1137 unsigned int rest = size - (offset - addr.disp());
1138 if (size > 0) {
1139 z_xc(offset, rest-1, addr.base(), offset, addr.base());
1140 }
1141 }
1142 }
1143
1144 void MacroAssembler::align(int modulus) {
1145 align(modulus, offset());
1146 }
1147
1148 void MacroAssembler::align(int modulus, int target) {
1149 assert(((modulus % 2 == 0) && (target % 2 == 0)), "needs to be even");
1150 int delta = target - offset();
1151 while ((offset() + delta) % modulus != 0) z_nop();
1152 }
1153
1154 // Special version for non-relocateable code if required alignment
1155 // is larger than CodeEntryAlignment.
1156 void MacroAssembler::align_address(int modulus) {
1157 while ((uintptr_t)pc() % modulus != 0) z_nop();
1158 }
1159
1160 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
1161 Register temp_reg,
1162 int64_t extra_slot_offset) {
1163 // On Z, we can have index and disp in an Address. So don't call argument_offset,
1164 // which issues an unnecessary add instruction.
1165 int stackElementSize = Interpreter::stackElementSize;
1166 int64_t offset = extra_slot_offset * stackElementSize;
1167 const Register argbase = Z_esp;
1168 if (arg_slot.is_constant()) {
1169 offset += arg_slot.as_constant() * stackElementSize;
1170 return Address(argbase, offset);
1171 }
1172 // else
1173 assert(temp_reg != noreg, "must specify");
1174 assert(temp_reg != Z_ARG1, "base and index are conflicting");
1175 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
1176 return Address(argbase, temp_reg, offset);
1177 }
1178
1179
1180 //===================================================================
1181 //=== START C O N S T A N T S I N C O D E S T R E A M ===
1182 //===================================================================
1183 //=== P A T CH A B L E C O N S T A N T S ===
1184 //===================================================================
1185
1186
1187 //---------------------------------------------------
1188 // Load (patchable) constant into register
1189 //---------------------------------------------------
1190
1191
1192 // Load absolute address (and try to optimize).
1193 // Note: This method is usable only for position-fixed code,
1194 // referring to a position-fixed target location.
1195 // If not so, relocations and patching must be used.
1196 void MacroAssembler::load_absolute_address(Register d, address addr) {
1197 assert(addr != nullptr, "should not happen");
1198 BLOCK_COMMENT("load_absolute_address:");
1199 if (addr == nullptr) {
1200 z_larl(d, pc()); // Dummy emit for size calc.
1201 return;
1202 }
1203
1204 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
1205 z_larl(d, addr);
1206 return;
1207 }
1208
1209 load_const_optimized(d, (long)addr);
1210 }
1211
1212 // Load a 64bit constant.
1213 // Patchable code sequence, but not atomically patchable.
1214 // Make sure to keep code size constant -> no value-dependent optimizations.
1215 // Do not kill condition code.
1216 void MacroAssembler::load_const(Register t, long x) {
1217 // Note: Right shift is only cleanly defined for unsigned types
1218 // or for signed types with nonnegative values.
1219 Assembler::z_iihf(t, (long)((unsigned long)x >> 32));
1220 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL));
1221 }
1222
1223 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
1224 // Patchable code sequence, but not atomically patchable.
1225 // Make sure to keep code size constant -> no value-dependent optimizations.
1226 // Do not kill condition code.
1227 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
1228 if (sign_extend) { Assembler::z_lgfi(t, x); }
1229 else { Assembler::z_llilf(t, x); }
1230 }
1231
1232 // Load narrow oop constant, no decompression.
1233 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
1234 assert(UseCompressedOops, "must be on to call this method");
1235 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/);
1236 }
1237
1238 // Load narrow klass constant, compression required.
1239 void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
1240 narrowKlass encoded_k = CompressedKlassPointers::encode(k);
1241 load_const_32to64(t, encoded_k, false /*sign_extend*/);
1242 }
1243
1244 //------------------------------------------------------
1245 // Compare (patchable) constant with register.
1246 //------------------------------------------------------
1247
1248 // Compare narrow oop in reg with narrow oop constant, no decompression.
1249 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
1250 assert(UseCompressedOops, "must be on to call this method");
1251
1252 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2));
1253 }
1254
1255 // Compare narrow oop in reg with narrow oop constant, no decompression.
1256 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
1257 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2);
1258
1259 Assembler::z_clfi(klass1, encoded_k);
1260 }
1261
1262 //----------------------------------------------------------
1263 // Check which kind of load_constant we have here.
1264 //----------------------------------------------------------
1265
1266 // Detection of CPU version dependent load_const sequence.
1267 // The detection is valid only for code sequences generated by load_const,
1268 // not load_const_optimized.
1269 bool MacroAssembler::is_load_const(address a) {
1270 unsigned long inst1, inst2;
1271 unsigned int len1, len2;
1272
1273 len1 = get_instruction(a, &inst1);
1274 len2 = get_instruction(a + len1, &inst2);
1275
1276 return is_z_iihf(inst1) && is_z_iilf(inst2);
1277 }
1278
1279 // Detection of CPU version dependent load_const_32to64 sequence.
1280 // Mostly used for narrow oops and narrow Klass pointers.
1281 // The detection is valid only for code sequences generated by load_const_32to64.
1282 bool MacroAssembler::is_load_const_32to64(address pos) {
1283 unsigned long inst1, inst2;
1284 unsigned int len1;
1285
1286 len1 = get_instruction(pos, &inst1);
1287 return is_z_llilf(inst1);
1288 }
1289
1290 // Detection of compare_immediate_narrow sequence.
1291 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1292 bool MacroAssembler::is_compare_immediate32(address pos) {
1293 return is_equal(pos, CLFI_ZOPC, RIL_MASK);
1294 }
1295
1296 // Detection of compare_immediate_narrow sequence.
1297 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1298 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
1299 return is_compare_immediate32(pos);
1300 }
1301
1302 // Detection of compare_immediate_narrow sequence.
1303 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
1304 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
1305 return is_compare_immediate32(pos);
1306 }
1307
1308 //-----------------------------------
1309 // patch the load_constant
1310 //-----------------------------------
1311
1312 // CPU-version dependent patching of load_const.
1313 void MacroAssembler::patch_const(address a, long x) {
1314 assert(is_load_const(a), "not a load of a constant");
1315 // Note: Right shift is only cleanly defined for unsigned types
1316 // or for signed types with nonnegative values.
1317 set_imm32((address)a, (long)((unsigned long)x >> 32));
1318 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL));
1319 }
1320
1321 // Patching the value of CPU version dependent load_const_32to64 sequence.
1322 // The passed ptr MUST be in compressed format!
1323 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
1324 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
1325
1326 set_imm32(pos, np);
1327 return 6;
1328 }
1329
1330 // Patching the value of CPU version dependent compare_immediate_narrow sequence.
1331 // The passed ptr MUST be in compressed format!
1332 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
1333 assert(is_compare_immediate32(pos), "not a compressed ptr compare");
1334
1335 set_imm32(pos, np);
1336 return 6;
1337 }
1338
1339 // Patching the immediate value of CPU version dependent load_narrow_oop sequence.
1340 // The passed ptr must NOT be in compressed format!
1341 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
1342 assert(UseCompressedOops, "Can only patch compressed oops");
1343 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o));
1344 }
1345
1346 // Patching the immediate value of CPU version dependent load_narrow_klass sequence.
1347 // The passed ptr must NOT be in compressed format!
1348 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
1349 narrowKlass nk = CompressedKlassPointers::encode(k);
1350 return patch_load_const_32to64(pos, nk);
1351 }
1352
1353 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
1354 // The passed ptr must NOT be in compressed format!
1355 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
1356 assert(UseCompressedOops, "Can only patch compressed oops");
1357 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o));
1358 }
1359
1360 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
1361 // The passed ptr must NOT be in compressed format!
1362 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
1363 narrowKlass nk = CompressedKlassPointers::encode(k);
1364 return patch_compare_immediate_32(pos, nk);
1365 }
1366
1367 //------------------------------------------------------------------------
1368 // Extract the constant from a load_constant instruction stream.
1369 //------------------------------------------------------------------------
1370
1371 // Get constant from a load_const sequence.
1372 long MacroAssembler::get_const(address a) {
1373 assert(is_load_const(a), "not a load of a constant");
1374 unsigned long x;
1375 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
1376 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
1377 return (long) x;
1378 }
1379
1380 //--------------------------------------
1381 // Store a constant in memory.
1382 //--------------------------------------
1383
1384 // General emitter to move a constant to memory.
1385 // The store is atomic.
1386 // o Address must be given in RS format (no index register)
1387 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
1388 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
1389 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
1390 // o Memory slot must be at least as wide as constant, will assert otherwise.
1391 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
1392 int MacroAssembler::store_const(const Address &dest, long imm,
1393 unsigned int lm, unsigned int lc,
1394 Register scratch) {
1395 int64_t disp = dest.disp();
1396 Register base = dest.base();
1397 assert(!dest.has_index(), "not supported");
1398 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported");
1399 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
1400 assert(lm>=lc, "memory slot too small");
1401 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
1402 assert(Displacement::is_validDisp(disp), "displacement out of range");
1403
1404 bool is_shortDisp = Displacement::is_shortDisp(disp);
1405 int store_offset = -1;
1406
1407 // For target len == 1 it's easy.
1408 if (lm == 1) {
1409 store_offset = offset();
1410 if (is_shortDisp) {
1411 z_mvi(disp, base, imm);
1412 return store_offset;
1413 } else {
1414 z_mviy(disp, base, imm);
1415 return store_offset;
1416 }
1417 }
1418
1419 // All the "good stuff" takes an unsigned displacement.
1420 if (is_shortDisp) {
1421 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
1422
1423 store_offset = offset();
1424 switch (lm) {
1425 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
1426 z_mvhhi(disp, base, imm);
1427 return store_offset;
1428 case 4:
1429 if (Immediate::is_simm16(imm)) {
1430 z_mvhi(disp, base, imm);
1431 return store_offset;
1432 }
1433 break;
1434 case 8:
1435 if (Immediate::is_simm16(imm)) {
1436 z_mvghi(disp, base, imm);
1437 return store_offset;
1438 }
1439 break;
1440 default:
1441 ShouldNotReachHere();
1442 break;
1443 }
1444 }
1445
1446 // Can't optimize, so load value and store it.
1447 guarantee(scratch != noreg, " need a scratch register here !");
1448 if (imm != 0) {
1449 load_const_optimized(scratch, imm); // Preserves CC anyway.
1450 } else {
1451 // Leave CC alone!!
1452 (void) clear_reg(scratch, true, false); // Indicate unused result.
1453 }
1454
1455 store_offset = offset();
1456 if (is_shortDisp) {
1457 switch (lm) {
1458 case 2:
1459 z_sth(scratch, disp, Z_R0, base);
1460 return store_offset;
1461 case 4:
1462 z_st(scratch, disp, Z_R0, base);
1463 return store_offset;
1464 case 8:
1465 z_stg(scratch, disp, Z_R0, base);
1466 return store_offset;
1467 default:
1468 ShouldNotReachHere();
1469 break;
1470 }
1471 } else {
1472 switch (lm) {
1473 case 2:
1474 z_sthy(scratch, disp, Z_R0, base);
1475 return store_offset;
1476 case 4:
1477 z_sty(scratch, disp, Z_R0, base);
1478 return store_offset;
1479 case 8:
1480 z_stg(scratch, disp, Z_R0, base);
1481 return store_offset;
1482 default:
1483 ShouldNotReachHere();
1484 break;
1485 }
1486 }
1487 return -1; // should not reach here
1488 }
1489
1490 //===================================================================
1491 //=== N O T P A T CH A B L E C O N S T A N T S ===
1492 //===================================================================
1493
1494 // Load constant x into register t with a fast instruction sequence
1495 // depending on the bits in x. Preserves CC under all circumstances.
1496 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
1497 if (x == 0) {
1498 int len;
1499 if (emit) {
1500 len = clear_reg(t, true, false);
1501 } else {
1502 len = 4;
1503 }
1504 return len;
1505 }
1506
1507 if (Immediate::is_simm16(x)) {
1508 if (emit) { z_lghi(t, x); }
1509 return 4;
1510 }
1511
1512 // 64 bit value: | part1 | part2 | part3 | part4 |
1513 // At least one part is not zero!
1514 // Note: Right shift is only cleanly defined for unsigned types
1515 // or for signed types with nonnegative values.
1516 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff;
1517 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff;
1518 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff;
1519 int part4 = (int)x & 0x0000ffff;
1520 int part12 = (int)((unsigned long)x >> 32);
1521 int part34 = (int)x;
1522
1523 // Lower word only (unsigned).
1524 if (part12 == 0) {
1525 if (part3 == 0) {
1526 if (emit) z_llill(t, part4);
1527 return 4;
1528 }
1529 if (part4 == 0) {
1530 if (emit) z_llilh(t, part3);
1531 return 4;
1532 }
1533 if (emit) z_llilf(t, part34);
1534 return 6;
1535 }
1536
1537 // Upper word only.
1538 if (part34 == 0) {
1539 if (part1 == 0) {
1540 if (emit) z_llihl(t, part2);
1541 return 4;
1542 }
1543 if (part2 == 0) {
1544 if (emit) z_llihh(t, part1);
1545 return 4;
1546 }
1547 if (emit) z_llihf(t, part12);
1548 return 6;
1549 }
1550
1551 // Lower word only (signed).
1552 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
1553 if (emit) z_lgfi(t, part34);
1554 return 6;
1555 }
1556
1557 int len = 0;
1558
1559 if ((part1 == 0) || (part2 == 0)) {
1560 if (part1 == 0) {
1561 if (emit) z_llihl(t, part2);
1562 len += 4;
1563 } else {
1564 if (emit) z_llihh(t, part1);
1565 len += 4;
1566 }
1567 } else {
1568 if (emit) z_llihf(t, part12);
1569 len += 6;
1570 }
1571
1572 if ((part3 == 0) || (part4 == 0)) {
1573 if (part3 == 0) {
1574 if (emit) z_iill(t, part4);
1575 len += 4;
1576 } else {
1577 if (emit) z_iilh(t, part3);
1578 len += 4;
1579 }
1580 } else {
1581 if (emit) z_iilf(t, part34);
1582 len += 6;
1583 }
1584 return len;
1585 }
1586
1587 //=====================================================================
1588 //=== H I G H E R L E V E L B R A N C H E M I T T E R S ===
1589 //=====================================================================
1590
1591 // Note: In the worst case, one of the scratch registers is destroyed!!!
1592 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1593 // Right operand is constant.
1594 if (x2.is_constant()) {
1595 jlong value = x2.as_constant();
1596 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
1597 return;
1598 }
1599
1600 // Right operand is in register.
1601 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
1602 }
1603
1604 // Note: In the worst case, one of the scratch registers is destroyed!!!
1605 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1606 // Right operand is constant.
1607 if (x2.is_constant()) {
1608 jlong value = x2.as_constant();
1609 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
1610 return;
1611 }
1612
1613 // Right operand is in register.
1614 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
1615 }
1616
1617 // Note: In the worst case, one of the scratch registers is destroyed!!!
1618 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1619 // Right operand is constant.
1620 if (x2.is_constant()) {
1621 jlong value = x2.as_constant();
1622 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
1623 return;
1624 }
1625
1626 // Right operand is in register.
1627 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
1628 }
1629
1630 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1631 // Right operand is constant.
1632 if (x2.is_constant()) {
1633 jlong value = x2.as_constant();
1634 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
1635 return;
1636 }
1637
1638 // Right operand is in register.
1639 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
1640 }
1641
1642 // Generate an optimal branch to the branch target.
1643 // Optimal means that a relative branch (brc or brcl) is used if the
1644 // branch distance is short enough. Loading the target address into a
1645 // register and branching via reg is used as fallback only.
1646 //
1647 // Used registers:
1648 // Z_R1 - work reg. Holds branch target address.
1649 // Used in fallback case only.
1650 //
1651 // This version of branch_optimized is good for cases where the target address is known
1652 // and constant, i.e. is never changed (no relocation, no patching).
1653 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
1654 address branch_origin = pc();
1655
1656 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1657 z_brc(cond, branch_addr);
1658 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
1659 z_brcl(cond, branch_addr);
1660 } else {
1661 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized.
1662 z_bcr(cond, Z_R1);
1663 }
1664 }
1665
1666 // This version of branch_optimized is good for cases where the target address
1667 // is potentially not yet known at the time the code is emitted.
1668 //
1669 // One very common case is a branch to an unbound label which is handled here.
1670 // The caller might know (or hope) that the branch distance is short enough
1671 // to be encoded in a 16bit relative address. In this case he will pass a
1672 // NearLabel branch_target.
1673 // Care must be taken with unbound labels. Each call to target(label) creates
1674 // an entry in the patch queue for that label to patch all references of the label
1675 // once it gets bound. Those recorded patch locations must be patchable. Otherwise,
1676 // an assertion fires at patch time.
1677 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
1678 if (branch_target.is_bound()) {
1679 address branch_addr = target(branch_target);
1680 branch_optimized(cond, branch_addr);
1681 } else if (branch_target.is_near()) {
1682 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc.
1683 } else {
1684 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
1685 }
1686 }
1687
1688 // Generate an optimal compare and branch to the branch target.
1689 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1690 // branch distance is short enough. Loading the target address into a
1691 // register and branching via reg is used as fallback only.
1692 //
1693 // Input:
1694 // r1 - left compare operand
1695 // r2 - right compare operand
1696 void MacroAssembler::compare_and_branch_optimized(Register r1,
1697 Register r2,
1698 Assembler::branch_condition cond,
1699 address branch_addr,
1700 bool len64,
1701 bool has_sign) {
1702 unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1703
1704 address branch_origin = pc();
1705 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1706 switch (casenum) {
1707 case 0: z_crj( r1, r2, cond, branch_addr); break;
1708 case 1: z_clrj (r1, r2, cond, branch_addr); break;
1709 case 2: z_cgrj(r1, r2, cond, branch_addr); break;
1710 case 3: z_clgrj(r1, r2, cond, branch_addr); break;
1711 default: ShouldNotReachHere(); break;
1712 }
1713 } else {
1714 switch (casenum) {
1715 case 0: z_cr( r1, r2); break;
1716 case 1: z_clr(r1, r2); break;
1717 case 2: z_cgr(r1, r2); break;
1718 case 3: z_clgr(r1, r2); break;
1719 default: ShouldNotReachHere(); break;
1720 }
1721 branch_optimized(cond, branch_addr);
1722 }
1723 }
1724
1725 // Generate an optimal compare and branch to the branch target.
1726 // Optimal means that a relative branch (clgij, brc or brcl) is used if the
1727 // branch distance is short enough. Loading the target address into a
1728 // register and branching via reg is used as fallback only.
1729 //
1730 // Input:
1731 // r1 - left compare operand (in register)
1732 // x2 - right compare operand (immediate)
1733 void MacroAssembler::compare_and_branch_optimized(Register r1,
1734 jlong x2,
1735 Assembler::branch_condition cond,
1736 Label& branch_target,
1737 bool len64,
1738 bool has_sign) {
1739 address branch_origin = pc();
1740 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
1741 bool is_RelAddr16 = branch_target.is_near() ||
1742 (branch_target.is_bound() &&
1743 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
1744 unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1745
1746 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
1747 switch (casenum) {
1748 case 0: z_cij( r1, x2, cond, branch_target); break;
1749 case 1: z_clij(r1, x2, cond, branch_target); break;
1750 case 2: z_cgij(r1, x2, cond, branch_target); break;
1751 case 3: z_clgij(r1, x2, cond, branch_target); break;
1752 default: ShouldNotReachHere(); break;
1753 }
1754 return;
1755 }
1756
1757 if (x2 == 0) {
1758 switch (casenum) {
1759 case 0: z_ltr(r1, r1); break;
1760 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1761 case 2: z_ltgr(r1, r1); break;
1762 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1763 default: ShouldNotReachHere(); break;
1764 }
1765 } else {
1766 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
1767 switch (casenum) {
1768 case 0: z_chi(r1, x2); break;
1769 case 1: z_chi(r1, x2); break; // positive immediate < 2**15
1770 case 2: z_cghi(r1, x2); break;
1771 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
1772 default: break;
1773 }
1774 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
1775 switch (casenum) {
1776 case 0: z_cfi( r1, x2); break;
1777 case 1: z_clfi(r1, x2); break;
1778 case 2: z_cgfi(r1, x2); break;
1779 case 3: z_clgfi(r1, x2); break;
1780 default: ShouldNotReachHere(); break;
1781 }
1782 } else {
1783 // No instruction with immediate operand possible, so load into register.
1784 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
1785 load_const_optimized(scratch, x2);
1786 switch (casenum) {
1787 case 0: z_cr( r1, scratch); break;
1788 case 1: z_clr(r1, scratch); break;
1789 case 2: z_cgr(r1, scratch); break;
1790 case 3: z_clgr(r1, scratch); break;
1791 default: ShouldNotReachHere(); break;
1792 }
1793 }
1794 }
1795 branch_optimized(cond, branch_target);
1796 }
1797
1798 // Generate an optimal compare and branch to the branch target.
1799 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1800 // branch distance is short enough. Loading the target address into a
1801 // register and branching via reg is used as fallback only.
1802 //
1803 // Input:
1804 // r1 - left compare operand
1805 // r2 - right compare operand
1806 void MacroAssembler::compare_and_branch_optimized(Register r1,
1807 Register r2,
1808 Assembler::branch_condition cond,
1809 Label& branch_target,
1810 bool len64,
1811 bool has_sign) {
1812 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1);
1813
1814 if (branch_target.is_bound()) {
1815 address branch_addr = target(branch_target);
1816 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
1817 } else {
1818 if (VM_Version::has_CompareBranch() && branch_target.is_near()) {
1819 switch (casenum) {
1820 case 0: z_crj( r1, r2, cond, branch_target); break;
1821 case 1: z_clrj( r1, r2, cond, branch_target); break;
1822 case 2: z_cgrj( r1, r2, cond, branch_target); break;
1823 case 3: z_clgrj(r1, r2, cond, branch_target); break;
1824 default: ShouldNotReachHere(); break;
1825 }
1826 } else {
1827 switch (casenum) {
1828 case 0: z_cr( r1, r2); break;
1829 case 1: z_clr(r1, r2); break;
1830 case 2: z_cgr(r1, r2); break;
1831 case 3: z_clgr(r1, r2); break;
1832 default: ShouldNotReachHere(); break;
1833 }
1834 branch_optimized(cond, branch_target);
1835 }
1836 }
1837 }
1838
1839 //===========================================================================
1840 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S ===
1841 //===========================================================================
1842
1843 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
1844 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder");
1845 int index = oop_recorder()->allocate_metadata_index(obj);
1846 RelocationHolder rspec = metadata_Relocation::spec(index);
1847 return AddressLiteral((address)obj, rspec);
1848 }
1849
1850 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
1851 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder");
1852 int index = oop_recorder()->find_index(obj);
1853 RelocationHolder rspec = metadata_Relocation::spec(index);
1854 return AddressLiteral((address)obj, rspec);
1855 }
1856
1857 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
1858 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder");
1859 int oop_index = oop_recorder()->allocate_oop_index(obj);
1860 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1861 }
1862
1863 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
1864 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder");
1865 int oop_index = oop_recorder()->find_index(obj);
1866 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1867 }
1868
1869 // NOTE: destroys r
1870 void MacroAssembler::c2bool(Register r, Register t) {
1871 z_lcr(t, r); // t = -r
1872 z_or(r, t); // r = -r OR r
1873 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
1874 }
1875
1876 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
1877 // and return the resulting instruction.
1878 // Dest_pos and inst_pos are 32 bit only. These parms can only designate
1879 // relative positions.
1880 // Use correct argument types. Do not pre-calculate distance.
1881 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
1882 int c = 0;
1883 unsigned long patched_inst = 0;
1884 if (is_call_pcrelative_short(inst) ||
1885 is_branch_pcrelative_short(inst) ||
1886 is_branchoncount_pcrelative_short(inst) ||
1887 is_branchonindex32_pcrelative_short(inst)) {
1888 c = 1;
1889 int m = fmask(15, 0); // simm16(-1, 16, 32);
1890 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
1891 patched_inst = (inst & ~m) | v;
1892 } else if (is_compareandbranch_pcrelative_short(inst)) {
1893 c = 2;
1894 long m = fmask(31, 16); // simm16(-1, 16, 48);
1895 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1896 patched_inst = (inst & ~m) | v;
1897 } else if (is_branchonindex64_pcrelative_short(inst)) {
1898 c = 3;
1899 long m = fmask(31, 16); // simm16(-1, 16, 48);
1900 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1901 patched_inst = (inst & ~m) | v;
1902 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
1903 c = 4;
1904 long m = fmask(31, 0); // simm32(-1, 16, 48);
1905 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1906 patched_inst = (inst & ~m) | v;
1907 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
1908 c = 5;
1909 long m = fmask(31, 0); // simm32(-1, 16, 48);
1910 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1911 patched_inst = (inst & ~m) | v;
1912 } else {
1913 print_dbg_msg(tty, inst, "not a relative branch", 0);
1914 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
1915 ShouldNotReachHere();
1916 }
1917
1918 long new_off = get_pcrel_offset(patched_inst);
1919 if (new_off != (dest_pos-inst_pos)) {
1920 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
1921 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0);
1922 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0);
1923 #ifdef LUCY_DBG
1924 VM_Version::z_SIGSEGV();
1925 #endif
1926 ShouldNotReachHere();
1927 }
1928 return patched_inst;
1929 }
1930
1931 // Only called when binding labels (share/vm/asm/assembler.cpp)
1932 // Pass arguments as intended. Do not pre-calculate distance.
1933 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) {
1934 unsigned long stub_inst;
1935 int inst_len = get_instruction(branch, &stub_inst);
1936
1937 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
1938 }
1939
1940
1941 // Extract relative address (aka offset).
1942 // inv_simm16 works for 4-byte instructions only.
1943 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
1944 long MacroAssembler::get_pcrel_offset(unsigned long inst) {
1945
1946 if (MacroAssembler::is_pcrelative_short(inst)) {
1947 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
1948 return RelAddr::inv_pcrel_off16(inv_simm16(inst));
1949 } else {
1950 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
1951 }
1952 }
1953
1954 if (MacroAssembler::is_pcrelative_long(inst)) {
1955 return RelAddr::inv_pcrel_off32(inv_simm32(inst));
1956 }
1957
1958 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
1959 #ifdef LUCY_DBG
1960 VM_Version::z_SIGSEGV();
1961 #else
1962 ShouldNotReachHere();
1963 #endif
1964 return -1;
1965 }
1966
1967 long MacroAssembler::get_pcrel_offset(address pc) {
1968 unsigned long inst;
1969 unsigned int len = get_instruction(pc, &inst);
1970
1971 #ifdef ASSERT
1972 long offset;
1973 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
1974 offset = get_pcrel_offset(inst);
1975 } else {
1976 offset = -1;
1977 }
1978
1979 if (offset == -1) {
1980 dump_code_range(tty, pc, 32, "not a pcrelative instruction");
1981 #ifdef LUCY_DBG
1982 VM_Version::z_SIGSEGV();
1983 #else
1984 ShouldNotReachHere();
1985 #endif
1986 }
1987 return offset;
1988 #else
1989 return get_pcrel_offset(inst);
1990 #endif // ASSERT
1991 }
1992
1993 // Get target address from pc-relative instructions.
1994 address MacroAssembler::get_target_addr_pcrel(address pc) {
1995 assert(is_pcrelative_long(pc), "not a pcrelative instruction");
1996 return pc + get_pcrel_offset(pc);
1997 }
1998
1999 // Patch pc relative load address.
2000 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
2001 unsigned long inst;
2002 // Offset is +/- 2**32 -> use long.
2003 ptrdiff_t distance = con - pc;
2004
2005 get_instruction(pc, &inst);
2006
2007 if (is_pcrelative_short(inst)) {
2008 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required.
2009
2010 // Some extra safety net.
2011 if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
2012 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
2013 dump_code_range(tty, pc, 32, "distance out of range (16bit)");
2014 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
2015 }
2016 return;
2017 }
2018
2019 if (is_pcrelative_long(inst)) {
2020 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc);
2021
2022 // Some Extra safety net.
2023 if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
2024 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
2025 dump_code_range(tty, pc, 32, "distance out of range (32bit)");
2026 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
2027 }
2028 return;
2029 }
2030
2031 guarantee(false, "not a pcrelative instruction to patch!");
2032 }
2033
2034 // "Current PC" here means the address just behind the basr instruction.
2035 address MacroAssembler::get_PC(Register result) {
2036 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
2037 return pc();
2038 }
2039
2040 // Get current PC + offset.
2041 // Offset given in bytes, must be even!
2042 // "Current PC" here means the address of the larl instruction plus the given offset.
2043 address MacroAssembler::get_PC(Register result, int64_t offset) {
2044 address here = pc();
2045 z_larl(result, offset/2); // Save target instruction address in result.
2046 return here + offset;
2047 }
2048
2049 void MacroAssembler::instr_size(Register size, Register pc) {
2050 // Extract 2 most significant bits of current instruction.
2051 z_llgc(size, Address(pc));
2052 z_srl(size, 6);
2053 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6.
2054 z_ahi(size, 3);
2055 z_nill(size, 6);
2056 }
2057
2058 // Resize_frame with SP(new) = SP(old) - [offset].
2059 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
2060 {
2061 assert_different_registers(offset, fp, Z_SP);
2062 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
2063
2064 z_sgr(Z_SP, offset);
2065 z_stg(fp, _z_abi(callers_sp), Z_SP);
2066 }
2067
2068 // Resize_frame with SP(new) = [newSP] + offset.
2069 // This emitter is useful if we already have calculated a pointer
2070 // into the to-be-allocated stack space, e.g. with special alignment properties,
2071 // but need some additional space, e.g. for spilling.
2072 // newSP is the pre-calculated pointer. It must not be modified.
2073 // fp holds, or is filled with, the frame pointer.
2074 // offset is the additional increment which is added to addr to form the new SP.
2075 // Note: specify a negative value to reserve more space!
2076 // load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2077 // It does not guarantee that fp contains the frame pointer at the end.
2078 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) {
2079 assert_different_registers(newSP, fp, Z_SP);
2080
2081 if (load_fp) {
2082 z_lg(fp, _z_abi(callers_sp), Z_SP);
2083 }
2084
2085 add2reg(Z_SP, offset, newSP);
2086 z_stg(fp, _z_abi(callers_sp), Z_SP);
2087 }
2088
2089 // Resize_frame with SP(new) = [newSP].
2090 // load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2091 // It does not guarantee that fp contains the frame pointer at the end.
2092 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) {
2093 assert_different_registers(newSP, fp, Z_SP);
2094
2095 if (load_fp) {
2096 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store.
2097 }
2098
2099 z_lgr(Z_SP, newSP);
2100 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses.
2101 z_stg(fp, _z_abi(callers_sp), newSP);
2102 } else {
2103 z_stg(fp, _z_abi(callers_sp), Z_SP);
2104 }
2105 }
2106
2107 // Resize_frame with SP(new) = SP(old) + offset.
2108 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
2109 assert_different_registers(fp, Z_SP);
2110
2111 if (load_fp) {
2112 z_lg(fp, _z_abi(callers_sp), Z_SP);
2113 }
2114 add64(Z_SP, offset);
2115 z_stg(fp, _z_abi(callers_sp), Z_SP);
2116 }
2117
2118 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
2119 #ifdef ASSERT
2120 assert_different_registers(bytes, old_sp, Z_SP);
2121 if (!copy_sp) {
2122 z_cgr(old_sp, Z_SP);
2123 asm_assert(bcondEqual, "[old_sp]!=[Z_SP]", 0x211);
2124 }
2125 #endif
2126 if (copy_sp) { z_lgr(old_sp, Z_SP); }
2127 if (bytes_with_inverted_sign) {
2128 z_agr(Z_SP, bytes);
2129 } else {
2130 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
2131 }
2132 z_stg(old_sp, _z_abi(callers_sp), Z_SP);
2133 }
2134
2135 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
2136 long offset = Assembler::align(bytes, frame::alignment_in_bytes);
2137 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset);
2138 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset);
2139
2140 // We must not write outside the current stack bounds (given by Z_SP).
2141 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage.
2142 // We rely on Z_R0 by default to be available as scratch.
2143 z_lgr(scratch, Z_SP);
2144 add2reg(Z_SP, -offset);
2145 z_stg(scratch, _z_abi(callers_sp), Z_SP);
2146 #ifdef ASSERT
2147 // Just make sure nobody uses the value in the default scratch register.
2148 // When another register is used, the caller might rely on it containing the frame pointer.
2149 if (scratch == Z_R0) {
2150 z_iihf(scratch, 0xbaadbabe);
2151 z_iilf(scratch, 0xdeadbeef);
2152 }
2153 #endif
2154 return offset;
2155 }
2156
2157 // Push a frame of size `bytes' plus abi160 on top.
2158 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
2159 BLOCK_COMMENT("push_frame_abi160 {");
2160 unsigned int res = push_frame(bytes + frame::z_abi_160_size);
2161 BLOCK_COMMENT("} push_frame_abi160");
2162 return res;
2163 }
2164
2165 // Pop current C frame.
2166 void MacroAssembler::pop_frame() {
2167 BLOCK_COMMENT("pop_frame {");
2168 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
2169 BLOCK_COMMENT("} pop_frame");
2170 }
2171
2172 // Pop current C frame and restore return PC register (Z_R14).
2173 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) {
2174 BLOCK_COMMENT("pop_frame_restore_retPC:");
2175 int retPC_offset = _z_common_abi(return_pc) + frame_size_in_bytes;
2176 // If possible, pop frame by add instead of load (a penny saved is a penny got :-).
2177 if (Displacement::is_validDisp(retPC_offset)) {
2178 z_lg(Z_R14, retPC_offset, Z_SP);
2179 add2reg(Z_SP, frame_size_in_bytes);
2180 } else {
2181 add2reg(Z_SP, frame_size_in_bytes);
2182 restore_return_pc();
2183 }
2184 }
2185
2186 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
2187 if (allow_relocation) {
2188 call_c(entry_point);
2189 } else {
2190 call_c_static(entry_point);
2191 }
2192 }
2193
2194 void MacroAssembler::call_VM_leaf_base(address entry_point) {
2195 bool allow_relocation = true;
2196 call_VM_leaf_base(entry_point, allow_relocation);
2197 }
2198
2199 int MacroAssembler::ic_check_size() {
2200 int ic_size = 24;
2201 if (!ImplicitNullChecks) {
2202 ic_size += 6;
2203 }
2204 if (UseCompactObjectHeaders) {
2205 ic_size += 12;
2206 } else {
2207 ic_size += 6; // either z_llgf or z_lg
2208 }
2209 return ic_size;
2210 }
2211
2212 int MacroAssembler::ic_check(int end_alignment) {
2213 Register R2_receiver = Z_ARG1;
2214 Register R0_scratch = Z_R0_scratch;
2215 Register R1_scratch = Z_R1_scratch;
2216 Register R9_data = Z_inline_cache;
2217 Label success, failure;
2218
2219 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
2220 // before the inline cache check, so we don't have to execute any nop instructions when dispatching
2221 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
2222 // before the inline cache check here, and not after
2223 align(end_alignment, offset() + ic_check_size());
2224
2225 int uep_offset = offset();
2226 if (!ImplicitNullChecks) {
2227 z_cgij(R2_receiver, 0, Assembler::bcondEqual, failure);
2228 }
2229
2230 if (UseCompactObjectHeaders) {
2231 load_narrow_klass_compact(R1_scratch, R2_receiver);
2232 } else {
2233 z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes()));
2234 }
2235 z_cg(R1_scratch, Address(R9_data, in_bytes(CompiledICData::speculated_klass_offset())));
2236 z_bre(success);
2237
2238 bind(failure);
2239 load_const(R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub()));
2240 z_br(R1_scratch);
2241 bind(success);
2242
2243 assert((offset() % end_alignment) == 0, "Misaligned verified entry point, offset() = %d, end_alignment = %d", offset(), end_alignment);
2244 return uep_offset;
2245 }
2246
2247 void MacroAssembler::call_VM_base(Register oop_result,
2248 Register last_java_sp,
2249 address entry_point,
2250 bool allow_relocation,
2251 bool check_exceptions) { // Defaults to true.
2252 // Allow_relocation indicates, if true, that the generated code shall
2253 // be fit for code relocation or referenced data relocation. In other
2254 // words: all addresses must be considered variable. PC-relative addressing
2255 // is not possible then.
2256 // On the other hand, if (allow_relocation == false), addresses and offsets
2257 // may be considered stable, enabling us to take advantage of some PC-relative
2258 // addressing tweaks. These might improve performance and reduce code size.
2259
2260 // Determine last_java_sp register.
2261 if (!last_java_sp->is_valid()) {
2262 last_java_sp = Z_SP; // Load Z_SP as SP.
2263 }
2264
2265 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
2266
2267 // ARG1 must hold thread address.
2268 z_lgr(Z_ARG1, Z_thread);
2269
2270 address return_pc = nullptr;
2271 if (allow_relocation) {
2272 return_pc = call_c(entry_point);
2273 } else {
2274 return_pc = call_c_static(entry_point);
2275 }
2276
2277 reset_last_Java_frame(allow_relocation);
2278
2279 // C++ interp handles this in the interpreter.
2280 check_and_handle_popframe(Z_thread);
2281 check_and_handle_earlyret(Z_thread);
2282
2283 // Check for pending exceptions.
2284 if (check_exceptions) {
2285 // Check for pending exceptions (java_thread is set upon return).
2286 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
2287
2288 // This used to conditionally jump to forward_exception however it is
2289 // possible if we relocate that the branch will not reach. So we must jump
2290 // around so we can always reach.
2291
2292 Label ok;
2293 z_bre(ok); // Bcondequal is the same as bcondZero.
2294 call_stub(StubRoutines::forward_exception_entry());
2295 bind(ok);
2296 }
2297
2298 // Get oop result if there is one and reset the value in the thread.
2299 if (oop_result->is_valid()) {
2300 get_vm_result_oop(oop_result);
2301 }
2302
2303 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls.
2304 }
2305
2306 void MacroAssembler::call_VM_base(Register oop_result,
2307 Register last_java_sp,
2308 address entry_point,
2309 bool check_exceptions) { // Defaults to true.
2310 bool allow_relocation = true;
2311 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
2312 }
2313
2314 // VM calls without explicit last_java_sp.
2315
2316 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
2317 // Call takes possible detour via InterpreterMacroAssembler.
2318 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
2319 }
2320
2321 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
2322 // Z_ARG1 is reserved for the thread.
2323 lgr_if_needed(Z_ARG2, arg_1);
2324 call_VM(oop_result, entry_point, check_exceptions);
2325 }
2326
2327 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
2328 // Z_ARG1 is reserved for the thread.
2329 assert_different_registers(arg_2, Z_ARG2);
2330 lgr_if_needed(Z_ARG2, arg_1);
2331 lgr_if_needed(Z_ARG3, arg_2);
2332 call_VM(oop_result, entry_point, check_exceptions);
2333 }
2334
2335 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2336 Register arg_3, bool check_exceptions) {
2337 // Z_ARG1 is reserved for the thread.
2338 assert_different_registers(arg_3, Z_ARG2, Z_ARG3);
2339 assert_different_registers(arg_2, Z_ARG2);
2340 lgr_if_needed(Z_ARG2, arg_1);
2341 lgr_if_needed(Z_ARG3, arg_2);
2342 lgr_if_needed(Z_ARG4, arg_3);
2343 call_VM(oop_result, entry_point, check_exceptions);
2344 }
2345
2346 // VM static calls without explicit last_java_sp.
2347
2348 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
2349 // Call takes possible detour via InterpreterMacroAssembler.
2350 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
2351 }
2352
2353 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2354 Register arg_3, bool check_exceptions) {
2355 // Z_ARG1 is reserved for the thread.
2356 assert_different_registers(arg_3, Z_ARG2, Z_ARG3);
2357 assert_different_registers(arg_2, Z_ARG2);
2358 lgr_if_needed(Z_ARG2, arg_1);
2359 lgr_if_needed(Z_ARG3, arg_2);
2360 lgr_if_needed(Z_ARG4, arg_3);
2361 call_VM_static(oop_result, entry_point, check_exceptions);
2362 }
2363
2364 // VM calls with explicit last_java_sp.
2365
2366 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
2367 // Call takes possible detour via InterpreterMacroAssembler.
2368 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
2369 }
2370
2371 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
2372 // Z_ARG1 is reserved for the thread.
2373 lgr_if_needed(Z_ARG2, arg_1);
2374 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2375 }
2376
2377 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2378 Register arg_2, bool check_exceptions) {
2379 // Z_ARG1 is reserved for the thread.
2380 assert_different_registers(arg_2, Z_ARG2);
2381 lgr_if_needed(Z_ARG2, arg_1);
2382 lgr_if_needed(Z_ARG3, arg_2);
2383 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2384 }
2385
2386 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2387 Register arg_2, Register arg_3, bool check_exceptions) {
2388 // Z_ARG1 is reserved for the thread.
2389 assert_different_registers(arg_3, Z_ARG2, Z_ARG3);
2390 assert_different_registers(arg_2, Z_ARG2);
2391 lgr_if_needed(Z_ARG2, arg_1);
2392 lgr_if_needed(Z_ARG3, arg_2);
2393 lgr_if_needed(Z_ARG4, arg_3);
2394 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2395 }
2396
2397 // VM leaf calls.
2398
2399 void MacroAssembler::call_VM_leaf(address entry_point) {
2400 // Call takes possible detour via InterpreterMacroAssembler.
2401 call_VM_leaf_base(entry_point, true);
2402 }
2403
2404 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
2405 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2406 call_VM_leaf(entry_point);
2407 }
2408
2409 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
2410 assert_different_registers(arg_2, Z_ARG1);
2411 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2412 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2413 call_VM_leaf(entry_point);
2414 }
2415
2416 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2417 assert_different_registers(arg_3, Z_ARG1, Z_ARG2);
2418 assert_different_registers(arg_2, Z_ARG1);
2419 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2420 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2421 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2422 call_VM_leaf(entry_point);
2423 }
2424
2425 // Static VM leaf calls.
2426 // Really static VM leaf calls are never patched.
2427
2428 void MacroAssembler::call_VM_leaf_static(address entry_point) {
2429 // Call takes possible detour via InterpreterMacroAssembler.
2430 call_VM_leaf_base(entry_point, false);
2431 }
2432
2433 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
2434 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2435 call_VM_leaf_static(entry_point);
2436 }
2437
2438 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
2439 assert_different_registers(arg_2, Z_ARG1);
2440 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2441 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2442 call_VM_leaf_static(entry_point);
2443 }
2444
2445 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2446 assert_different_registers(arg_3, Z_ARG1, Z_ARG2);
2447 assert_different_registers(arg_2, Z_ARG1);
2448 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2449 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2450 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2451 call_VM_leaf_static(entry_point);
2452 }
2453
2454 // Don't use detour via call_c(reg).
2455 address MacroAssembler::call_c(address function_entry) {
2456 load_const(Z_R1, function_entry);
2457 return call(Z_R1);
2458 }
2459
2460 // Variant for really static (non-relocatable) calls which are never patched.
2461 address MacroAssembler::call_c_static(address function_entry) {
2462 load_absolute_address(Z_R1, function_entry);
2463 #if 0 // def ASSERT
2464 // Verify that call site did not move.
2465 load_const_optimized(Z_R0, function_entry);
2466 z_cgr(Z_R1, Z_R0);
2467 z_brc(bcondEqual, 3);
2468 z_illtrap(0xba);
2469 #endif
2470 return call(Z_R1);
2471 }
2472
2473 address MacroAssembler::call_c_opt(address function_entry) {
2474 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
2475 _last_calls_return_pc = success ? pc() : nullptr;
2476 return _last_calls_return_pc;
2477 }
2478
2479 // Identify a call_far_patchable instruction: LARL + LG + BASR
2480 //
2481 // nop ; optionally, if required for alignment
2482 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool
2483 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary
2484 //
2485 // Code pattern will eventually get patched into variant2 (see below for detection code).
2486 //
2487 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
2488 address iaddr = instruction_addr;
2489
2490 // Check for the actual load instruction.
2491 if (!is_load_const_from_toc(iaddr)) { return false; }
2492 iaddr += load_const_from_toc_size();
2493
2494 // Check for the call (BASR) instruction, finally.
2495 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
2496 return is_call_byregister(iaddr);
2497 }
2498
2499 // Identify a call_far_patchable instruction: BRASL
2500 //
2501 // Code pattern to suits atomic patching:
2502 // nop ; Optionally, if required for alignment.
2503 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer).
2504 // nop ; For code pattern detection: Prepend each BRASL with a nop.
2505 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned !
2506 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
2507 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
2508
2509 // Check for correct number of leading nops.
2510 address iaddr;
2511 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
2512 if (!is_z_nop(iaddr)) { return false; }
2513 }
2514 assert(iaddr == call_addr, "sanity");
2515
2516 // --> Check for call instruction.
2517 if (is_call_far_pcrelative(call_addr)) {
2518 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
2519 return true;
2520 }
2521
2522 return false;
2523 }
2524
2525 // Emit a NOT mt-safely patchable 64 bit absolute call.
2526 // If toc_offset == -2, then the destination of the call (= target) is emitted
2527 // to the constant pool and a runtime_call relocation is added
2528 // to the code buffer.
2529 // If toc_offset != -2, target must already be in the constant pool at
2530 // _ctableStart+toc_offset (a caller can retrieve toc_offset
2531 // from the runtime_call relocation).
2532 // Special handling of emitting to scratch buffer when there is no constant pool.
2533 // Slightly changed code pattern. We emit an additional nop if we would
2534 // not end emitting at a word aligned address. This is to ensure
2535 // an atomically patchable displacement in brasl instructions.
2536 //
2537 // A call_far_patchable comes in different flavors:
2538 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
2539 // - LGRL(CP) / BR (address in constant pool, pc-relative access)
2540 // - BRASL (relative address of call target coded in instruction)
2541 // All flavors occupy the same amount of space. Length differences are compensated
2542 // by leading nops, such that the instruction sequence always ends at the same
2543 // byte offset. This is required to keep the return offset constant.
2544 // Furthermore, the return address (the end of the instruction sequence) is forced
2545 // to be on a 4-byte boundary. This is required for atomic patching, should we ever
2546 // need to patch the call target of the BRASL flavor.
2547 // RETURN value: false, if no constant pool entry could be allocated, true otherwise.
2548 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
2549 // Get current pc and ensure word alignment for end of instr sequence.
2550 const address start_pc = pc();
2551 const intptr_t start_off = offset();
2552 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
2553 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
2554 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
2555 const bool emit_relative_call = !emit_target_to_pool &&
2556 RelAddr::is_in_range_of_RelAddr32(dist) &&
2557 ReoptimizeCallSequences &&
2558 !code_section()->scratch_emit();
2559
2560 if (emit_relative_call) {
2561 // Add padding to get the same size as below.
2562 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
2563 unsigned int current_padding;
2564 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
2565 assert(current_padding == padding, "sanity");
2566
2567 // relative call: len = 2(nop) + 6 (brasl)
2568 // CodeBlob resize cannot occur in this case because
2569 // this call is emitted into pre-existing space.
2570 z_nop(); // Prepend each BRASL with a nop.
2571 z_brasl(Z_R14, target);
2572 } else {
2573 // absolute call: Get address from TOC.
2574 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
2575 if (emit_target_to_pool) {
2576 // When emitting the call for the first time, we do not need to use
2577 // the pc-relative version. It will be patched anyway, when the code
2578 // buffer is copied.
2579 // Relocation is not needed when !ReoptimizeCallSequences.
2580 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
2581 AddressLiteral dest(target, rt);
2582 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
2583 // inst_mark(). Reset if possible.
2584 bool reset_mark = (inst_mark() == pc());
2585 tocOffset = store_oop_in_toc(dest);
2586 if (reset_mark) { set_inst_mark(); }
2587 if (tocOffset == -1) {
2588 return false; // Couldn't create constant pool entry.
2589 }
2590 }
2591 assert(offset() == start_off, "emit no code before this point!");
2592
2593 address tocPos = pc() + tocOffset;
2594 if (emit_target_to_pool) {
2595 tocPos = code()->consts()->start() + tocOffset;
2596 }
2597 load_long_pcrelative(Z_R14, tocPos);
2598 z_basr(Z_R14, Z_R14);
2599 }
2600
2601 #ifdef ASSERT
2602 // Assert that we can identify the emitted call.
2603 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
2604 assert(offset() == start_off+call_far_patchable_size(), "wrong size");
2605
2606 if (emit_target_to_pool) {
2607 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
2608 "wrong encoding of dest address");
2609 }
2610 #endif
2611 return true; // success
2612 }
2613
2614 // Identify a call_far_patchable instruction.
2615 // For more detailed information see header comment of call_far_patchable.
2616 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
2617 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL
2618 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR
2619 }
2620
2621 // Does the call_far_patchable instruction use a pc-relative encoding
2622 // of the call destination?
2623 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
2624 // Variant 2 is pc-relative.
2625 return is_call_far_patchable_variant2_at(instruction_addr);
2626 }
2627
2628 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
2629 // Prepend each BRASL with a nop.
2630 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required.
2631 }
2632
2633 // Set destination address of a call_far_patchable instruction.
2634 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
2635 ResourceMark rm;
2636
2637 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
2638 int code_size = MacroAssembler::call_far_patchable_size();
2639 CodeBuffer buf(instruction_addr, code_size);
2640 MacroAssembler masm(&buf);
2641 masm.call_far_patchable(dest, tocOffset);
2642 ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
2643 }
2644
2645 // Get dest address of a call_far_patchable instruction.
2646 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
2647 // Dynamic TOC: absolute address in constant pool.
2648 // Check variant2 first, it is more frequent.
2649
2650 // Relative address encoded in call instruction.
2651 if (is_call_far_patchable_variant2_at(instruction_addr)) {
2652 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
2653
2654 // Absolute address in constant pool.
2655 } else if (is_call_far_patchable_variant0_at(instruction_addr)) {
2656 address iaddr = instruction_addr;
2657
2658 long tocOffset = get_load_const_from_toc_offset(iaddr);
2659 address tocLoc = iaddr + tocOffset;
2660 return *(address *)(tocLoc);
2661 } else {
2662 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
2663 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
2664 *(unsigned long*)instruction_addr,
2665 *(unsigned long*)(instruction_addr+8),
2666 call_far_patchable_size());
2667 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
2668 ShouldNotReachHere();
2669 return nullptr;
2670 }
2671 }
2672
2673 void MacroAssembler::align_call_far_patchable(address pc) {
2674 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
2675 }
2676
2677 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
2678 }
2679
2680 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
2681 }
2682
2683 // Read from the polling page.
2684 // Use TM or TMY instruction, depending on read offset.
2685 // offset = 0: Use TM, safepoint polling.
2686 // offset < 0: Use TMY, profiling safepoint polling.
2687 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
2688 if (Immediate::is_uimm12(offset)) {
2689 z_tm(offset, polling_page_address, mask_safepoint);
2690 } else {
2691 z_tmy(offset, polling_page_address, mask_profiling);
2692 }
2693 }
2694
2695 // Check whether z_instruction is a read access to the polling page
2696 // which was emitted by load_from_polling_page(..).
2697 bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
2698 unsigned long z_instruction;
2699 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2700
2701 if (ilen == 2) { return false; } // It's none of the allowed instructions.
2702
2703 if (ilen == 4) {
2704 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
2705
2706 int ms = inv_mask(z_instruction,8,32); // mask
2707 int ra = inv_reg(z_instruction,16,32); // base register
2708 int ds = inv_uimm12(z_instruction); // displacement
2709
2710 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
2711 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
2712 }
2713
2714 } else { /* if (ilen == 6) */
2715
2716 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
2717
2718 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
2719
2720 int ms = inv_mask(z_instruction,8,48); // mask
2721 int ra = inv_reg(z_instruction,16,48); // base register
2722 int ds = inv_simm20(z_instruction); // displacement
2723 }
2724
2725 return true;
2726 }
2727
2728 // Extract poll address from instruction and ucontext.
2729 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
2730 assert(ucontext != nullptr, "must have ucontext");
2731 ucontext_t* uc = (ucontext_t*) ucontext;
2732 unsigned long z_instruction;
2733 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2734
2735 if (ilen == 4 && is_z_tm(z_instruction)) {
2736 int ra = inv_reg(z_instruction, 16, 32); // base register
2737 int ds = inv_uimm12(z_instruction); // displacement
2738 address addr = (address)uc->uc_mcontext.gregs[ra];
2739 return addr + ds;
2740 } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2741 int ra = inv_reg(z_instruction, 16, 48); // base register
2742 int ds = inv_simm20(z_instruction); // displacement
2743 address addr = (address)uc->uc_mcontext.gregs[ra];
2744 return addr + ds;
2745 }
2746
2747 ShouldNotReachHere();
2748 return nullptr;
2749 }
2750
2751 // Extract poll register from instruction.
2752 uint MacroAssembler::get_poll_register(address instr_loc) {
2753 unsigned long z_instruction;
2754 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2755
2756 if (ilen == 4 && is_z_tm(z_instruction)) {
2757 return (uint)inv_reg(z_instruction, 16, 32); // base register
2758 } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2759 return (uint)inv_reg(z_instruction, 16, 48); // base register
2760 }
2761
2762 ShouldNotReachHere();
2763 return 0;
2764 }
2765
2766 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
2767 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */);
2768 // Armed page has poll_bit set.
2769 z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
2770 z_brnaz(slow_path);
2771 }
2772
2773 // Don't rely on register locking, always use Z_R1 as scratch register instead.
2774 void MacroAssembler::bang_stack_with_offset(int offset) {
2775 // Stack grows down, caller passes positive offset.
2776 assert(offset > 0, "must bang with positive offset");
2777 if (Displacement::is_validDisp(-offset)) {
2778 z_tmy(-offset, Z_SP, mask_stackbang);
2779 } else {
2780 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!!
2781 z_tm(0, Z_R1, mask_stackbang); // Just banging.
2782 }
2783 }
2784
2785 void MacroAssembler::reserved_stack_check(Register return_pc) {
2786 // Test if reserved zone needs to be enabled.
2787 Label no_reserved_zone_enabling;
2788 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub.");
2789 BLOCK_COMMENT("reserved_stack_check {");
2790
2791 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset()));
2792 z_brl(no_reserved_zone_enabling);
2793
2794 // Enable reserved zone again, throw stack overflow exception.
2795 save_return_pc();
2796 push_frame_abi160(0);
2797 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread);
2798 pop_frame();
2799 restore_return_pc();
2800
2801 load_const_optimized(Z_R1, SharedRuntime::throw_delayed_StackOverflowError_entry());
2802 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc.
2803 z_br(Z_R1);
2804
2805 should_not_reach_here();
2806
2807 bind(no_reserved_zone_enabling);
2808 BLOCK_COMMENT("} reserved_stack_check");
2809 }
2810
2811 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
2812 void MacroAssembler::tlab_allocate(Register obj,
2813 Register var_size_in_bytes,
2814 int con_size_in_bytes,
2815 Register t1,
2816 Label& slow_case) {
2817 assert_different_registers(obj, var_size_in_bytes, t1);
2818 Register end = t1;
2819 Register thread = Z_thread;
2820
2821 z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
2822 if (var_size_in_bytes == noreg) {
2823 z_lay(end, Address(obj, con_size_in_bytes));
2824 } else {
2825 z_lay(end, Address(obj, var_size_in_bytes));
2826 }
2827 z_cg(end, Address(thread, JavaThread::tlab_end_offset()));
2828 branch_optimized(bcondHigh, slow_case);
2829
2830 // Update the tlab top pointer.
2831 z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
2832
2833 // Recover var_size_in_bytes if necessary.
2834 if (var_size_in_bytes == end) {
2835 z_sgr(var_size_in_bytes, obj);
2836 }
2837 }
2838
2839 // Emitter for interface method lookup.
2840 // input: recv_klass, intf_klass, itable_index
2841 // output: method_result
2842 // kills: itable_index, temp1_reg, Z_R0, Z_R1
2843 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
2844 // If the register is still not needed then, remove it.
2845 void MacroAssembler::lookup_interface_method(Register recv_klass,
2846 Register intf_klass,
2847 RegisterOrConstant itable_index,
2848 Register method_result,
2849 Register temp1_reg,
2850 Label& no_such_interface,
2851 bool return_method) {
2852
2853 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr.
2854 const Register itable_entry_addr = Z_R1_scratch;
2855 const Register itable_interface = Z_R0_scratch;
2856
2857 BLOCK_COMMENT("lookup_interface_method {");
2858
2859 // Load start of itable entries into itable_entry_addr.
2860 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset()));
2861 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
2862
2863 // Loop over all itable entries until desired interfaceOop(Rinterface) found.
2864 add2reg_with_index(itable_entry_addr,
2865 in_bytes(Klass::vtable_start_offset() + itableOffsetEntry::interface_offset()),
2866 recv_klass, vtable_len);
2867
2868 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
2869 Label search;
2870
2871 bind(search);
2872
2873 // Handle IncompatibleClassChangeError.
2874 // If the entry is null then we've reached the end of the table
2875 // without finding the expected interface, so throw an exception.
2876 load_and_test_long(itable_interface, Address(itable_entry_addr));
2877 z_bre(no_such_interface);
2878
2879 add2reg(itable_entry_addr, itable_offset_search_inc);
2880 z_cgr(itable_interface, intf_klass);
2881 z_brne(search);
2882
2883 // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
2884 if (return_method) {
2885 const int vtable_offset_offset = in_bytes(itableOffsetEntry::offset_offset() -
2886 itableOffsetEntry::interface_offset()) -
2887 itable_offset_search_inc;
2888
2889 // Compute itableMethodEntry and get method and entry point
2890 // we use addressing with index and displacement, since the formula
2891 // for computing the entry's offset has a fixed and a dynamic part,
2892 // the latter depending on the matched interface entry and on the case,
2893 // that the itable index has been passed as a register, not a constant value.
2894 int method_offset = in_bytes(itableMethodEntry::method_offset());
2895 // Fixed part (displacement), common operand.
2896 Register itable_offset = method_result; // Dynamic part (index register).
2897
2898 if (itable_index.is_register()) {
2899 // Compute the method's offset in that register, for the formula, see the
2900 // else-clause below.
2901 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize));
2902 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
2903 } else {
2904 // Displacement increases.
2905 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
2906
2907 // Load index from itable.
2908 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
2909 }
2910
2911 // Finally load the method's oop.
2912 z_lg(method_result, method_offset, itable_offset, recv_klass);
2913 }
2914 BLOCK_COMMENT("} lookup_interface_method");
2915 }
2916
2917 // Lookup for virtual method invocation.
2918 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2919 RegisterOrConstant vtable_index,
2920 Register method_result) {
2921 assert_different_registers(recv_klass, vtable_index.register_or_noreg());
2922 assert(vtableEntry::size() * wordSize == wordSize,
2923 "else adjust the scaling in the code below");
2924
2925 BLOCK_COMMENT("lookup_virtual_method {");
2926
2927 const int base = in_bytes(Klass::vtable_start_offset());
2928
2929 if (vtable_index.is_constant()) {
2930 // Load with base + disp.
2931 Address vtable_entry_addr(recv_klass,
2932 vtable_index.as_constant() * wordSize +
2933 base +
2934 in_bytes(vtableEntry::method_offset()));
2935
2936 z_lg(method_result, vtable_entry_addr);
2937 } else {
2938 // Shift index properly and load with base + index + disp.
2939 Register vindex = vtable_index.as_register();
2940 Address vtable_entry_addr(recv_klass, vindex,
2941 base + in_bytes(vtableEntry::method_offset()));
2942
2943 z_sllg(vindex, vindex, exact_log2(wordSize));
2944 z_lg(method_result, vtable_entry_addr);
2945 }
2946 BLOCK_COMMENT("} lookup_virtual_method");
2947 }
2948
2949 // Factor out code to call ic_miss_handler.
2950 // Generate code to call the inline cache miss handler.
2951 //
2952 // In most cases, this code will be generated out-of-line.
2953 // The method parameters are intended to provide some variability.
2954 // ICM - Label which has to be bound to the start of useful code (past any traps).
2955 // trapMarker - Marking byte for the generated illtrap instructions (if any).
2956 // Any value except 0x00 is supported.
2957 // = 0x00 - do not generate illtrap instructions.
2958 // use nops to fill unused space.
2959 // requiredSize - required size of the generated code. If the actually
2960 // generated code is smaller, use padding instructions to fill up.
2961 // = 0 - no size requirement, no padding.
2962 // scratch - scratch register to hold branch target address.
2963 //
2964 // The method returns the code offset of the bound label.
2965 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
2966 intptr_t startOffset = offset();
2967
2968 // Prevent entry at content_begin().
2969 if (trapMarker != 0) {
2970 z_illtrap(trapMarker);
2971 }
2972
2973 // Load address of inline cache miss code into scratch register
2974 // and branch to cache miss handler.
2975 BLOCK_COMMENT("IC miss handler {");
2976 BIND(ICM);
2977 unsigned int labelOffset = offset();
2978 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
2979
2980 load_const_optimized(scratch, icmiss);
2981 z_br(scratch);
2982
2983 // Fill unused space.
2984 if (requiredSize > 0) {
2985 while ((offset() - startOffset) < requiredSize) {
2986 if (trapMarker == 0) {
2987 z_nop();
2988 } else {
2989 z_illtrap(trapMarker);
2990 }
2991 }
2992 }
2993 BLOCK_COMMENT("} IC miss handler");
2994 return labelOffset;
2995 }
2996
2997 void MacroAssembler::nmethod_UEP(Label& ic_miss) {
2998 Register ic_reg = Z_inline_cache;
2999 int klass_offset = oopDesc::klass_offset_in_bytes();
3000 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
3001 if (VM_Version::has_CompareBranch()) {
3002 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
3003 } else {
3004 z_ltgr(Z_ARG1, Z_ARG1);
3005 z_bre(ic_miss);
3006 }
3007 }
3008 // Compare cached class against klass from receiver.
3009 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
3010 z_brne(ic_miss);
3011 }
3012
3013 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3014 Register super_klass,
3015 Register temp1_reg,
3016 Label* L_success,
3017 Label* L_failure,
3018 Label* L_slow_path,
3019 Register super_check_offset) {
3020 // Input registers must not overlap.
3021 assert_different_registers(sub_klass, super_klass, temp1_reg, super_check_offset);
3022
3023 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
3024 bool must_load_sco = ! super_check_offset->is_valid();
3025
3026 // Input registers must not overlap.
3027 if (must_load_sco) {
3028 assert(temp1_reg != noreg, "supply either a temp or a register offset");
3029 }
3030
3031 const Register Rsuper_check_offset = temp1_reg;
3032
3033 NearLabel L_fallthrough;
3034 int label_nulls = 0;
3035 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
3036 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
3037 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
3038 assert(label_nulls <= 1 || (L_slow_path == &L_fallthrough && label_nulls <= 2), "at most one null in the batch, usually");
3039
3040 BLOCK_COMMENT("check_klass_subtype_fast_path {");
3041 // If the pointers are equal, we are done (e.g., String[] elements).
3042 // This self-check enables sharing of secondary supertype arrays among
3043 // non-primary types such as array-of-interface. Otherwise, each such
3044 // type would need its own customized SSA.
3045 // We move this check to the front of the fast path because many
3046 // type checks are in fact trivially successful in this manner,
3047 // so we get a nicely predicted branch right at the start of the check.
3048 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
3049
3050 // Check the supertype display, which is uint.
3051 if (must_load_sco) {
3052 z_llgf(Rsuper_check_offset, sco_offset, super_klass);
3053 super_check_offset = Rsuper_check_offset;
3054 }
3055
3056 Address super_check_addr(sub_klass, super_check_offset, 0);
3057 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
3058 branch_optimized(Assembler::bcondEqual, *L_success);
3059
3060 // This check has worked decisively for primary supers.
3061 // Secondary supers are sought in the super_cache ('super_cache_addr').
3062 // (Secondary supers are interfaces and very deeply nested subtypes.)
3063 // This works in the same check above because of a tricky aliasing
3064 // between the super_cache and the primary super display elements.
3065 // (The 'super_check_addr' can address either, as the case requires.)
3066 // Note that the cache is updated below if it does not help us find
3067 // what we need immediately.
3068 // So if it was a primary super, we can just fail immediately.
3069 // Otherwise, it's the slow path for us (no success at this point).
3070
3071 // Hacked jmp, which may only be used just before L_fallthrough.
3072 #define final_jmp(label) \
3073 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3074 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
3075
3076 z_cfi(super_check_offset, in_bytes(Klass::secondary_super_cache_offset()));
3077 if (L_failure == &L_fallthrough) {
3078 branch_optimized(Assembler::bcondEqual, *L_slow_path);
3079 } else {
3080 branch_optimized(Assembler::bcondNotEqual, *L_failure);
3081 final_jmp(*L_slow_path);
3082 }
3083
3084 bind(L_fallthrough);
3085 #undef final_jmp
3086 BLOCK_COMMENT("} check_klass_subtype_fast_path");
3087 // fallthru (to slow path)
3088 }
3089
3090 void MacroAssembler::check_klass_subtype_slow_path_linear(Register Rsubklass,
3091 Register Rsuperklass,
3092 Register Rarray_ptr, // tmp
3093 Register Rlength, // tmp
3094 Label* L_success,
3095 Label* L_failure,
3096 bool set_cond_codes /* unused */) {
3097 // Input registers must not overlap.
3098 // Also check for R1 which is explicitly used here.
3099 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
3100 NearLabel L_fallthrough;
3101 int label_nulls = 0;
3102 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
3103 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
3104 assert(label_nulls <= 1, "at most one null in the batch");
3105
3106 const int ss_offset = in_bytes(Klass::secondary_supers_offset());
3107 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3108
3109 const int length_offset = Array<Klass*>::length_offset_in_bytes();
3110 const int base_offset = Array<Klass*>::base_offset_in_bytes();
3111
3112 // Hacked jmp, which may only be used just before L_fallthrough.
3113 #define final_jmp(label) \
3114 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3115 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
3116
3117 NearLabel loop_iterate, loop_count, match;
3118
3119 BLOCK_COMMENT("check_klass_subtype_slow_path_linear {");
3120 z_lg(Rarray_ptr, ss_offset, Rsubklass);
3121
3122 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
3123 branch_optimized(Assembler::bcondZero, *L_failure);
3124
3125 // Oops in table are NO MORE compressed.
3126 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
3127 z_bre(match); // Shortcut for array length = 1.
3128
3129 // No match yet, so we must walk the array's elements.
3130 z_lngfr(Rlength, Rlength);
3131 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
3132 z_llill(Z_R1, BytesPerWord); // Set increment/end index.
3133 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord
3134 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord
3135 z_bru(loop_count);
3136
3137 BIND(loop_iterate);
3138 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
3139 z_bre(match);
3140 BIND(loop_count);
3141 z_brxlg(Rlength, Z_R1, loop_iterate);
3142
3143 // Rsuperklass not found among secondary super classes -> failure.
3144 branch_optimized(Assembler::bcondAlways, *L_failure);
3145
3146 // Got a hit. Return success (zero result). Set cache.
3147 // Cache load doesn't happen here. For speed, it is directly emitted by the compiler.
3148
3149 BIND(match);
3150
3151 if (UseSecondarySupersCache) {
3152 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
3153 }
3154 final_jmp(*L_success);
3155
3156 // Exit to the surrounding code.
3157 BIND(L_fallthrough);
3158 #undef final_jmp
3159 BLOCK_COMMENT("} check_klass_subtype_slow_path_linear");
3160 }
3161
3162 // If Register r is invalid, remove a new register from
3163 // available_regs, and add new register to regs_to_push.
3164 Register MacroAssembler::allocate_if_noreg(Register r,
3165 RegSetIterator<Register> &available_regs,
3166 RegSet ®s_to_push) {
3167 if (!r->is_valid()) {
3168 r = *available_regs++;
3169 regs_to_push += r;
3170 }
3171 return r;
3172 }
3173
3174 // check_klass_subtype_slow_path_table() looks for super_klass in the
3175 // hash table belonging to super_klass, branching to L_success or
3176 // L_failure as appropriate. This is essentially a shim which
3177 // allocates registers as necessary and then calls
3178 // lookup_secondary_supers_table() to do the work. Any of the temp
3179 // regs may be noreg, in which case this logic will choose some
3180 // registers push and pop them from the stack.
3181 void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass,
3182 Register super_klass,
3183 Register temp_reg,
3184 Register temp2_reg,
3185 Register temp3_reg,
3186 Register temp4_reg,
3187 Register result_reg,
3188 Label* L_success,
3189 Label* L_failure,
3190 bool set_cond_codes) {
3191 BLOCK_COMMENT("check_klass_subtype_slow_path_table {");
3192
3193 RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg, temp4_reg);
3194
3195 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp4_reg);
3196
3197 Label L_fallthrough;
3198 int label_nulls = 0;
3199 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
3200 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
3201 assert(label_nulls <= 1, "at most one null in the batch");
3202
3203 RegSetIterator<Register> available_regs
3204 // Z_R0 will be used to hold Z_R15(Z_SP) while pushing a new frame, So don't use that here.
3205 // Z_R1 will be used to hold r_bitmap in lookup_secondary_supers_table_var, so can't be used
3206 // Z_R2, Z_R3, Z_R4 will be used in secondary_supers_verify, for the failure reporting
3207 = (RegSet::range(Z_R0, Z_R15) - temps - sub_klass - super_klass - Z_R1_scratch - Z_R0_scratch - Z_R2 - Z_R3 - Z_R4).begin();
3208
3209 RegSet pushed_regs;
3210
3211 temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs);
3212 temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs);
3213 temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs);;
3214 temp4_reg = allocate_if_noreg(temp4_reg, available_regs, pushed_regs);
3215 result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs);
3216
3217 const int frame_size = pushed_regs.size() * BytesPerWord + frame::z_abi_160_size;
3218
3219 // Push & save registers
3220 {
3221 int i = 0;
3222 save_return_pc();
3223 push_frame(frame_size);
3224
3225 for (auto it = pushed_regs.begin(); *it != noreg; i++) {
3226 z_stg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
3227 }
3228 assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
3229 }
3230
3231 lookup_secondary_supers_table_var(sub_klass,
3232 super_klass,
3233 temp_reg, temp2_reg, temp3_reg, temp4_reg, result_reg);
3234
3235 // NOTE: Condition Code should not be altered before jump instruction below !!!!
3236 z_cghi(result_reg, 0);
3237
3238 {
3239 int i = 0;
3240 for (auto it = pushed_regs.begin(); *it != noreg; ++i) {
3241 z_lg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
3242 }
3243 assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
3244 pop_frame();
3245 restore_return_pc();
3246 }
3247
3248 // NB! Callers may assume that, when set_cond_codes is true, this
3249 // code sets temp2_reg to a nonzero value.
3250 if (set_cond_codes) {
3251 z_lghi(temp2_reg, 1);
3252 }
3253
3254 branch_optimized(bcondNotEqual, *L_failure);
3255
3256 if(L_success != &L_fallthrough) {
3257 z_bru(*L_success);
3258 }
3259
3260 bind(L_fallthrough);
3261 BLOCK_COMMENT("} check_klass_subtype_slow_path_table");
3262 }
3263
3264 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3265 Register super_klass,
3266 Register temp_reg,
3267 Register temp2_reg,
3268 Label* L_success,
3269 Label* L_failure,
3270 bool set_cond_codes) {
3271 BLOCK_COMMENT("check_klass_subtype_slow_path {");
3272 if (UseSecondarySupersTable) {
3273 check_klass_subtype_slow_path_table(sub_klass,
3274 super_klass,
3275 temp_reg,
3276 temp2_reg,
3277 /*temp3*/noreg,
3278 /*temp4*/noreg,
3279 /*result*/noreg,
3280 L_success,
3281 L_failure,
3282 set_cond_codes);
3283 } else {
3284 check_klass_subtype_slow_path_linear(sub_klass,
3285 super_klass,
3286 temp_reg,
3287 temp2_reg,
3288 L_success,
3289 L_failure,
3290 set_cond_codes);
3291 }
3292 BLOCK_COMMENT("} check_klass_subtype_slow_path");
3293 }
3294
3295 // Emitter for combining fast and slow path.
3296 void MacroAssembler::check_klass_subtype(Register sub_klass,
3297 Register super_klass,
3298 Register temp1_reg,
3299 Register temp2_reg,
3300 Label& L_success) {
3301 NearLabel failure;
3302 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
3303 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
3304 &L_success, &failure, nullptr);
3305 check_klass_subtype_slow_path(sub_klass, super_klass,
3306 temp1_reg, temp2_reg, &L_success, nullptr);
3307 BIND(failure);
3308 BLOCK_COMMENT("} check_klass_subtype");
3309 }
3310
3311 // scans r_count pointer sized words at [r_addr] for occurrence of r_value,
3312 // generic (r_count must be >0)
3313 // iff found: CC eq, r_result == 0
3314 void MacroAssembler::repne_scan(Register r_addr, Register r_value, Register r_count, Register r_result) {
3315 NearLabel L_loop, L_exit;
3316
3317 BLOCK_COMMENT("repne_scan {");
3318 #ifdef ASSERT
3319 z_chi(r_count, 0);
3320 asm_assert(bcondHigh, "count must be positive", 11);
3321 #endif
3322
3323 clear_reg(r_result, true /* whole_reg */, false /* set_cc */); // sets r_result=0, let's hope that search will be successful
3324
3325 bind(L_loop);
3326 z_cg(r_value, Address(r_addr));
3327 z_bre(L_exit); // branch on success
3328 z_la(r_addr, wordSize, r_addr);
3329 z_brct(r_count, L_loop);
3330
3331 // z_brct above doesn't change CC.
3332 // If we reach here, then the value in r_value is not present. Set r_result to 1.
3333 z_lghi(r_result, 1);
3334
3335 bind(L_exit);
3336 BLOCK_COMMENT("} repne_scan");
3337 }
3338
3339 // Ensure that the inline code and the stub are using the same registers.
3340 #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
3341 do { \
3342 assert(r_super_klass == Z_ARG1 && \
3343 r_array_base == Z_ARG5 && \
3344 r_array_length == Z_ARG4 && \
3345 (r_array_index == Z_ARG3 || r_array_index == noreg) && \
3346 (r_sub_klass == Z_ARG2 || r_sub_klass == noreg) && \
3347 (r_bitmap == Z_R10 || r_bitmap == noreg) && \
3348 (r_result == Z_R11 || r_result == noreg), "registers must match s390.ad"); \
3349 } while(0)
3350
3351 // Note: this method also kills Z_R1_scratch register on machines older than z15
3352 void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass,
3353 Register r_super_klass,
3354 Register r_temp1,
3355 Register r_temp2,
3356 Register r_temp3,
3357 Register r_temp4,
3358 Register r_result,
3359 u1 super_klass_slot) {
3360 NearLabel L_done, L_failure;
3361
3362 BLOCK_COMMENT("lookup_secondary_supers_table_const {");
3363
3364 const Register
3365 r_array_base = r_temp1,
3366 r_array_length = r_temp2,
3367 r_array_index = r_temp3,
3368 r_bitmap = r_temp4;
3369
3370 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
3371
3372 z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset()));
3373
3374 // First check the bitmap to see if super_klass might be present. If
3375 // the bit is zero, we are certain that super_klass is not one of
3376 // the secondary supers.
3377 u1 bit = super_klass_slot;
3378 int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit;
3379
3380 z_sllg(r_array_index, r_bitmap, shift_count); // take the bit to 63rd location
3381
3382 // Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded
3383 // with 1 (failure) at the end of this method.
3384 clear_reg(r_result, true /* whole_reg */, false /* set_cc */); // r_result = 0
3385
3386 // We test the MSB of r_array_index, i.e., its sign bit
3387 testbit(r_array_index, 63);
3388 z_bfalse(L_failure); // if not set, then jump!!!
3389
3390 // We will consult the secondary-super array.
3391 z_lg(r_array_base, Address(r_sub_klass, Klass::secondary_supers_offset()));
3392
3393 // The value i in r_array_index is >= 1, so even though r_array_base
3394 // points to the length, we don't need to adjust it to point to the
3395 // data.
3396 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
3397
3398 // Get the first array index that can contain super_klass.
3399 if (bit != 0) {
3400 pop_count_long(r_array_index, r_array_index, Z_R1_scratch); // kills Z_R1_scratch on machines older than z15
3401
3402 // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
3403 z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale
3404 } else {
3405 // Actually use index 0, but r_array_base and r_array_index are off by 1 word
3406 // such that the sum is precise.
3407 z_lghi(r_array_index, BytesPerWord); // for slow path (scaled)
3408 }
3409
3410 z_cg(r_super_klass, Address(r_array_base, r_array_index));
3411 branch_optimized(bcondEqual, L_done); // found a match; success
3412
3413 // Is there another entry to check? Consult the bitmap.
3414 testbit(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK);
3415 z_bfalse(L_failure);
3416
3417 // Linear probe. Rotate the bitmap so that the next bit to test is
3418 // in Bit 2 for the look-ahead check in the slow path.
3419 if (bit != 0) {
3420 z_rllg(r_bitmap, r_bitmap, 64-bit); // rotate right
3421 }
3422
3423 // Calls into the stub generated by lookup_secondary_supers_table_slow_path.
3424 // Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap.
3425 // Kills: r_array_length.
3426 // Returns: r_result
3427
3428 call_stub(StubRoutines::lookup_secondary_supers_table_slow_path_stub());
3429
3430 z_bru(L_done); // pass whatever result we got from a slow path
3431
3432 bind(L_failure);
3433
3434 z_lghi(r_result, 1);
3435
3436 bind(L_done);
3437 BLOCK_COMMENT("} lookup_secondary_supers_table_const");
3438
3439 if (VerifySecondarySupers) {
3440 verify_secondary_supers_table(r_sub_klass, r_super_klass, r_result,
3441 r_temp1, r_temp2, r_temp3);
3442 }
3443 }
3444
3445 // At runtime, return 0 in result if r_super_klass is a superclass of
3446 // r_sub_klass, otherwise return nonzero. Use this version of
3447 // lookup_secondary_supers_table() if you don't know ahead of time
3448 // which superclass will be searched for. Used by interpreter and
3449 // runtime stubs. It is larger and has somewhat greater latency than
3450 // the version above, which takes a constant super_klass_slot.
3451 void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass,
3452 Register r_super_klass,
3453 Register temp1,
3454 Register temp2,
3455 Register temp3,
3456 Register temp4,
3457 Register result) {
3458 assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result, Z_R1_scratch);
3459
3460 Label L_done, L_failure;
3461
3462 BLOCK_COMMENT("lookup_secondary_supers_table_var {");
3463
3464 const Register
3465 r_array_index = temp3,
3466 slot = temp4, // NOTE: "slot" can't be Z_R0 otherwise z_sllg and z_rllg instructions below will mess up!!!!
3467 r_bitmap = Z_R1_scratch;
3468
3469 z_llgc(slot, Address(r_super_klass, Klass::hash_slot_offset()));
3470
3471 // Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded
3472 // with 1 (failure) at the end of this method.
3473 clear_reg(result, true /* whole_reg */, false /* set_cc */); // result = 0
3474
3475 z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset()));
3476
3477 // First check the bitmap to see if super_klass might be present. If
3478 // the bit is zero, we are certain that super_klass is not one of
3479 // the secondary supers.
3480 z_xilf(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64)
3481 z_sllg(r_array_index, r_bitmap, /*d2 = */ 0, /* b2 = */ slot);
3482
3483 testbit(r_array_index, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1);
3484 branch_optimized(bcondAllZero, L_failure);
3485
3486 const Register
3487 r_array_base = temp1,
3488 r_array_length = temp2;
3489
3490 // Get the first array index that can contain super_klass into r_array_index.
3491 // NOTE: Z_R1_scratch is holding bitmap (look above for r_bitmap). So let's try to save it.
3492 // On the other hand, r_array_base/temp1 is free at current moment (look at the load operation below).
3493 pop_count_long(r_array_index, r_array_index, temp1); // kills r_array_base/temp1 on machines older than z15
3494
3495 // The value i in r_array_index is >= 1, so even though r_array_base
3496 // points to the length, we don't need to adjust it to point to the data.
3497 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
3498 assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");
3499
3500 // We will consult the secondary-super array.
3501 z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
3502
3503 // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
3504 z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale, r_array_index is loaded by popcnt above
3505
3506 z_cg(r_super_klass, Address(r_array_base, r_array_index));
3507 branch_optimized(bcondEqual, L_done); // found a match
3508
3509 // Note: this is a small hack:
3510 //
3511 // The operation "(slot ^ 63) === 63 - slot (mod 64)" has already been performed above.
3512 // Since we lack a rotate-right instruction, we achieve the same effect by rotating left
3513 // by "64 - slot" positions. This produces the result equivalent to a right rotation by "slot" positions.
3514 //
3515 // => initial slot value
3516 // => slot = 63 - slot // done above with that z_xilf instruction
3517 // => slot = 64 - slot // need to do for rotating right by "slot" positions
3518 // => slot = 64 - (63 - slot)
3519 // => slot = slot - 63 + 64
3520 // => slot = slot + 1
3521 //
3522 // So instead of rotating-left by 64-slot times, we can, for now, just rotate left by slot+1 and it would be fine.
3523
3524 // Linear probe. Rotate the bitmap so that the next bit to test is
3525 // in Bit 1.
3526 z_aghi(slot, 1); // slot = slot + 1
3527
3528 z_rllg(r_bitmap, r_bitmap, /*d2=*/ 0, /*b2=*/ slot);
3529 testbit(r_bitmap, 1);
3530 branch_optimized(bcondAllZero, L_failure);
3531
3532 // The slot we just inspected is at secondary_supers[r_array_index - 1].
3533 // The next slot to be inspected, by the logic we're about to call,
3534 // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
3535 // have been checked.
3536 lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index,
3537 r_bitmap, /*temp=*/ r_array_length, result, /*is_stub*/false);
3538
3539 // pass whatever we got from slow path
3540 z_bru(L_done);
3541
3542 bind(L_failure);
3543 z_lghi(result, 1); // load 1 to represent failure
3544
3545 bind(L_done);
3546
3547 BLOCK_COMMENT("} lookup_secondary_supers_table_var");
3548
3549 if (VerifySecondarySupers) {
3550 verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
3551 temp1, temp2, temp3);
3552 }
3553 }
3554
3555 // Called by code generated by check_klass_subtype_slow_path
3556 // above. This is called when there is a collision in the hashed
3557 // lookup in the secondary supers array.
3558 void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
3559 Register r_array_base,
3560 Register r_array_index,
3561 Register r_bitmap,
3562 Register r_temp,
3563 Register r_result,
3564 bool is_stub) {
3565 assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp);
3566
3567 const Register
3568 r_array_length = r_temp,
3569 r_sub_klass = noreg;
3570
3571 if(is_stub) {
3572 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
3573 }
3574
3575 BLOCK_COMMENT("lookup_secondary_supers_table_slow_path {");
3576 NearLabel L_done, L_failure;
3577
3578 // Load the array length.
3579 z_llgf(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
3580
3581 // And adjust the array base to point to the data.
3582 // NB!
3583 // Effectively increments the current slot index by 1.
3584 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
3585 add2reg(r_array_base, Array<Klass*>::base_offset_in_bytes());
3586
3587 // Linear probe
3588 NearLabel L_huge;
3589
3590 // The bitmap is full to bursting.
3591 z_chi(r_array_length, Klass::SECONDARY_SUPERS_BITMAP_FULL - 2);
3592 z_brh(L_huge);
3593
3594 // NB! Our caller has checked bits 0 and 1 in the bitmap. The
3595 // current slot (at secondary_supers[r_array_index]) has not yet
3596 // been inspected, and r_array_index may be out of bounds if we
3597 // wrapped around the end of the array.
3598
3599 { // This is conventional linear probing, but instead of terminating
3600 // when a null entry is found in the table, we maintain a bitmap
3601 // in which a 0 indicates missing entries.
3602 // As long as the bitmap is not completely full,
3603 // array_length == popcount(bitmap). The array_length check above
3604 // guarantees there are 0s in the bitmap, so the loop eventually
3605 // terminates.
3606
3607 #ifdef ASSERT
3608 // r_result is set to 0 by lookup_secondary_supers_table.
3609 // clear_reg(r_result, true /* whole_reg */, false /* set_cc */);
3610 z_cghi(r_result, 0);
3611 asm_assert(bcondEqual, "r_result required to be 0, used by z_locgr", 44);
3612
3613 // We should only reach here after having found a bit in the bitmap.
3614 z_ltgr(r_array_length, r_array_length);
3615 asm_assert(bcondHigh, "array_length > 0, should hold", 22);
3616 #endif // ASSERT
3617
3618 // Compute limit in r_array_length
3619 add2reg(r_array_length, -1);
3620 z_sllg(r_array_length, r_array_length, LogBytesPerWord);
3621
3622 NearLabel L_loop;
3623 bind(L_loop);
3624
3625 // Check for wraparound.
3626 z_cgr(r_array_index, r_array_length);
3627 z_locgr(r_array_index, r_result, bcondHigh); // r_result is containing 0
3628
3629 z_cg(r_super_klass, Address(r_array_base, r_array_index));
3630 z_bre(L_done); // success
3631
3632 // look-ahead check: if Bit 2 is 0, we're done
3633 testbit(r_bitmap, 2);
3634 z_bfalse(L_failure);
3635
3636 z_rllg(r_bitmap, r_bitmap, 64-1); // rotate right
3637 add2reg(r_array_index, BytesPerWord);
3638
3639 z_bru(L_loop);
3640 }
3641
3642 { // Degenerate case: more than 64 secondary supers.
3643 // FIXME: We could do something smarter here, maybe a vectorized
3644 // comparison or a binary search, but is that worth any added
3645 // complexity?
3646
3647 bind(L_huge);
3648 repne_scan(r_array_base, r_super_klass, r_array_length, r_result);
3649
3650 z_bru(L_done); // forward the result we got from repne_scan
3651 }
3652
3653 bind(L_failure);
3654 z_lghi(r_result, 1);
3655
3656 bind(L_done);
3657 BLOCK_COMMENT("} lookup_secondary_supers_table_slow_path");
3658 }
3659
3660 // Make sure that the hashed lookup and a linear scan agree.
3661 void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
3662 Register r_super_klass,
3663 Register r_result /* expected */,
3664 Register r_temp1,
3665 Register r_temp2,
3666 Register r_temp3) {
3667 assert_different_registers(r_sub_klass, r_super_klass, r_result, r_temp1, r_temp2, r_temp3);
3668
3669 const Register
3670 r_array_base = r_temp1,
3671 r_array_length = r_temp2,
3672 r_array_index = r_temp3,
3673 r_bitmap = noreg; // unused
3674
3675 BLOCK_COMMENT("verify_secondary_supers_table {");
3676
3677 Label L_passed, L_failure;
3678
3679 // We will consult the secondary-super array.
3680 z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
3681
3682 // Load the array length.
3683 z_llgf(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
3684
3685 // And adjust the array base to point to the data.
3686 z_aghi(r_array_base, Array<Klass*>::base_offset_in_bytes());
3687
3688 const Register r_linear_result = r_array_index; // reuse
3689 z_chi(r_array_length, 0);
3690 load_on_condition_imm_32(r_linear_result, 1, bcondNotHigh); // load failure if array_length <= 0
3691 z_brc(bcondNotHigh, L_failure);
3692 repne_scan(r_array_base, r_super_klass, r_array_length, r_linear_result);
3693 bind(L_failure);
3694
3695 z_cr(r_result, r_linear_result);
3696 z_bre(L_passed);
3697
3698 // report fatal error and terminate VM
3699
3700 // Argument shuffle
3701 // Z_F1, Z_F3, Z_F5 are volatile regs
3702 z_ldgr(Z_F1, r_super_klass);
3703 z_ldgr(Z_F3, r_sub_klass);
3704 z_ldgr(Z_F5, r_linear_result);
3705
3706 z_lgr(Z_ARG4, r_result);
3707
3708 z_lgdr(Z_ARG1, Z_F1); // r_super_klass
3709 z_lgdr(Z_ARG2, Z_F3); // r_sub_klass
3710 z_lgdr(Z_ARG3, Z_F5); // r_linear_result
3711
3712 const char* msg = "mismatch";
3713 load_const_optimized(Z_ARG5, (address)msg);
3714
3715 call_VM_leaf(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure));
3716 should_not_reach_here();
3717
3718 bind(L_passed);
3719
3720 BLOCK_COMMENT("} verify_secondary_supers_table");
3721 }
3722
3723 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
3724 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
3725
3726 Label L_fallthrough;
3727 if (L_fast_path == nullptr) {
3728 L_fast_path = &L_fallthrough;
3729 } else if (L_slow_path == nullptr) {
3730 L_slow_path = &L_fallthrough;
3731 }
3732
3733 // Fast path check: class is fully initialized.
3734 // init_state needs acquire, but S390 is TSO, and so we are already good.
3735 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3736 z_bre(*L_fast_path);
3737
3738 // Fast path check: current thread is initializer thread
3739 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset()));
3740 if (L_slow_path == &L_fallthrough) {
3741 z_bre(*L_fast_path);
3742 } else if (L_fast_path == &L_fallthrough) {
3743 z_brne(*L_slow_path);
3744 } else {
3745 Unimplemented();
3746 }
3747
3748 bind(L_fallthrough);
3749 }
3750
3751 // Increment a counter at counter_address when the eq condition code is
3752 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
3753 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
3754 Label l;
3755 z_brne(l);
3756 load_const(tmp1_reg, counter_address);
3757 add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
3758 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
3759 bind(l);
3760 }
3761
3762 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
3763 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3764 bs->resolve_jobject(this, value, tmp1, tmp2);
3765 }
3766
3767 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) {
3768 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3769 bs->resolve_global_jobject(this, value, tmp1, tmp2);
3770 }
3771
3772 // Last_Java_sp must comply to the rules in frame_s390.hpp.
3773 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
3774 BLOCK_COMMENT("set_last_Java_frame {");
3775
3776 // Always set last_Java_pc and flags first because once last_Java_sp
3777 // is visible has_last_Java_frame is true and users will look at the
3778 // rest of the fields. (Note: flags should always be zero before we
3779 // get here so doesn't need to be set.)
3780
3781 // Verify that last_Java_pc was zeroed on return to Java.
3782 if (allow_relocation) {
3783 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
3784 Z_thread,
3785 "last_Java_pc not zeroed before leaving Java",
3786 0x200);
3787 } else {
3788 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
3789 Z_thread,
3790 "last_Java_pc not zeroed before leaving Java",
3791 0x200);
3792 }
3793
3794 // When returning from calling out from Java mode the frame anchor's
3795 // last_Java_pc will always be set to null. It is set here so that
3796 // if we are doing a call to native (not VM) that we capture the
3797 // known pc and don't have to rely on the native call having a
3798 // standard frame linkage where we can find the pc.
3799 if (last_Java_pc!=noreg) {
3800 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
3801 }
3802
3803 // This membar release is not required on z/Architecture, since the sequence of stores
3804 // in maintained. Nevertheless, we leave it in to document the required ordering.
3805 // The implementation of z_release() should be empty.
3806 // z_release();
3807
3808 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
3809 BLOCK_COMMENT("} set_last_Java_frame");
3810 }
3811
3812 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
3813 BLOCK_COMMENT("reset_last_Java_frame {");
3814
3815 if (allow_relocation) {
3816 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
3817 Z_thread,
3818 "SP was not set, still zero",
3819 0x202);
3820 } else {
3821 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
3822 Z_thread,
3823 "SP was not set, still zero",
3824 0x202);
3825 }
3826
3827 // _last_Java_sp = 0
3828 // Clearing storage must be atomic here, so don't use clear_mem()!
3829 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
3830
3831 // _last_Java_pc = 0
3832 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
3833
3834 BLOCK_COMMENT("} reset_last_Java_frame");
3835 return;
3836 }
3837
3838 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
3839 assert_different_registers(sp, tmp1);
3840
3841 // We cannot trust that code generated by the C++ compiler saves R14
3842 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
3843 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
3844 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save
3845 // it into the frame anchor.
3846 get_PC(tmp1);
3847 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
3848 }
3849
3850 void MacroAssembler::set_thread_state(JavaThreadState new_state) {
3851 z_release();
3852
3853 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
3854 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
3855 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
3856 }
3857
3858 void MacroAssembler::get_vm_result_oop(Register oop_result) {
3859 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_oop_offset()));
3860 clear_mem(Address(Z_thread, JavaThread::vm_result_oop_offset()), sizeof(void*));
3861
3862 verify_oop(oop_result, FILE_AND_LINE);
3863 }
3864
3865 void MacroAssembler::get_vm_result_metadata(Register result) {
3866 z_lg(result, Address(Z_thread, JavaThread::vm_result_metadata_offset()));
3867 clear_mem(Address(Z_thread, JavaThread::vm_result_metadata_offset()), sizeof(void*));
3868 }
3869
3870 // We require that C code which does not return a value in vm_result will
3871 // leave it undisturbed.
3872 void MacroAssembler::set_vm_result(Register oop_result) {
3873 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_oop_offset()));
3874 }
3875
3876 // Explicit null checks (used for method handle code).
3877 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
3878 if (!ImplicitNullChecks) {
3879 NearLabel ok;
3880
3881 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
3882
3883 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
3884 address exception_entry = Interpreter::throw_NullPointerException_entry();
3885 load_absolute_address(reg, exception_entry);
3886 z_br(reg);
3887
3888 bind(ok);
3889 } else {
3890 if (needs_explicit_null_check((intptr_t)offset)) {
3891 // Provoke OS null exception if reg is null by
3892 // accessing M[reg] w/o changing any registers.
3893 z_lg(tmp, 0, reg);
3894 }
3895 // else
3896 // Nothing to do, (later) access of M[reg + offset]
3897 // will provoke OS null exception if reg is null.
3898 }
3899 }
3900
3901 //-------------------------------------
3902 // Compressed Klass Pointers
3903 //-------------------------------------
3904
3905 // Klass oop manipulations if compressed.
3906 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3907 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
3908 address base = CompressedKlassPointers::base();
3909 int shift = CompressedKlassPointers::shift();
3910 bool need_zero_extend = base != nullptr;
3911
3912 BLOCK_COMMENT("cKlass encoder {");
3913
3914 #ifdef ASSERT
3915 Label ok;
3916 z_tmll(current, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment.
3917 z_brc(Assembler::bcondAllZero, ok);
3918 // The plain disassembler does not recognize illtrap. It instead displays
3919 // a 32-bit value. Issuing two illtraps assures the disassembler finds
3920 // the proper beginning of the next instruction.
3921 z_illtrap(0xee);
3922 z_illtrap(0xee);
3923 bind(ok);
3924 #endif
3925
3926 // Scale down the incoming klass pointer first.
3927 // We then can be sure we calculate an offset that fits into 32 bit.
3928 // More generally speaking: all subsequent calculations are purely 32-bit.
3929 if (shift != 0) {
3930 z_srlg(dst, current, shift);
3931 current = dst;
3932 }
3933
3934 if (base != nullptr) {
3935 // Use scaled-down base address parts to match scaled-down klass pointer.
3936 unsigned int base_h = ((unsigned long)base)>>(32+shift);
3937 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift);
3938
3939 // General considerations:
3940 // - when calculating (current_h - base_h), all digits must cancel (become 0).
3941 // Otherwise, we would end up with a compressed klass pointer which doesn't
3942 // fit into 32-bit.
3943 // - Only bit#33 of the difference could potentially be non-zero. For that
3944 // to happen, (current_l < base_l) must hold. In this case, the subtraction
3945 // will create a borrow out of bit#32, nicely killing bit#33.
3946 // - With the above, we only need to consider current_l and base_l to
3947 // calculate the result.
3948 // - Both values are treated as unsigned. The unsigned subtraction is
3949 // replaced by adding (unsigned) the 2's complement of the subtrahend.
3950
3951 if (base_l == 0) {
3952 // - By theory, the calculation to be performed here (current_h - base_h) MUST
3953 // cancel all high-word bits. Otherwise, we would end up with an offset
3954 // (i.e. compressed klass pointer) that does not fit into 32 bit.
3955 // - current_l remains unchanged.
3956 // - Therefore, we can replace all calculation with just a
3957 // zero-extending load 32 to 64 bit.
3958 // - Even that can be replaced with a conditional load if dst != current.
3959 // (this is a local view. The shift step may have requested zero-extension).
3960 } else {
3961 if ((base_h == 0) && is_uimm(base_l, 31)) {
3962 // If we happen to find that (base_h == 0), and that base_l is within the range
3963 // which can be represented by a signed int, then we can use 64bit signed add with
3964 // (-base_l) as 32bit signed immediate operand. The add will take care of the
3965 // upper 32 bits of the result, saving us the need of an extra zero extension.
3966 // For base_l to be in the required range, it must not have the most significant
3967 // bit (aka sign bit) set.
3968 lgr_if_needed(dst, current); // no zero/sign extension in this case!
3969 z_agfi(dst, -(int)base_l); // base_l must be passed as signed.
3970 need_zero_extend = false;
3971 current = dst;
3972 } else {
3973 // To begin with, we may need to copy and/or zero-extend the register operand.
3974 // We have to calculate (current_l - base_l). Because there is no unsigend
3975 // subtract instruction with immediate operand, we add the 2's complement of base_l.
3976 if (need_zero_extend) {
3977 z_llgfr(dst, current);
3978 need_zero_extend = false;
3979 } else {
3980 llgfr_if_needed(dst, current);
3981 }
3982 current = dst;
3983 z_alfi(dst, -base_l);
3984 }
3985 }
3986 }
3987
3988 if (need_zero_extend) {
3989 // We must zero-extend the calculated result. It may have some leftover bits in
3990 // the hi-word because we only did optimized calculations.
3991 z_llgfr(dst, current);
3992 } else {
3993 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost.
3994 }
3995
3996 BLOCK_COMMENT("} cKlass encoder");
3997 }
3998
3999 // This function calculates the size of the code generated by
4000 // decode_klass_not_null(register dst, Register src)
4001 // when Universe::heap() isn't null. Hence, if the instructions
4002 // it generates change, then this method needs to be updated.
4003 int MacroAssembler::instr_size_for_decode_klass_not_null() {
4004 address base = CompressedKlassPointers::base();
4005 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */
4006 int addbase_size = 0;
4007
4008 if (base != nullptr) {
4009 unsigned int base_h = ((unsigned long)base)>>32;
4010 unsigned int base_l = (unsigned int)((unsigned long)base);
4011 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4012 addbase_size += 6; /* aih */
4013 } else if ((base_h == 0) && (base_l != 0)) {
4014 addbase_size += 6; /* algfi */
4015 } else {
4016 addbase_size += load_const_size();
4017 addbase_size += 4; /* algr */
4018 }
4019 }
4020 #ifdef ASSERT
4021 addbase_size += 10;
4022 addbase_size += 2; // Extra sigill.
4023 #endif
4024 return addbase_size + shift_size;
4025 }
4026
4027 // !!! If the instructions that get generated here change
4028 // then function instr_size_for_decode_klass_not_null()
4029 // needs to get updated.
4030 // This variant of decode_klass_not_null() must generate predictable code!
4031 // The code must only depend on globally known parameters.
4032 void MacroAssembler::decode_klass_not_null(Register dst) {
4033 address base = CompressedKlassPointers::base();
4034 int shift = CompressedKlassPointers::shift();
4035 int beg_off = offset();
4036
4037 BLOCK_COMMENT("cKlass decoder (const size) {");
4038
4039 if (shift != 0) { // Shift required?
4040 z_sllg(dst, dst, shift);
4041 }
4042 if (base != nullptr) {
4043 unsigned int base_h = ((unsigned long)base)>>32;
4044 unsigned int base_l = (unsigned int)((unsigned long)base);
4045 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4046 z_aih(dst, base_h); // Base has no set bits in lower half.
4047 } else if ((base_h == 0) && (base_l != 0)) {
4048 z_algfi(dst, base_l); // Base has no set bits in upper half.
4049 } else {
4050 load_const(Z_R0, base); // Base has set bits everywhere.
4051 z_algr(dst, Z_R0);
4052 }
4053 }
4054
4055 #ifdef ASSERT
4056 Label ok;
4057 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment.
4058 z_brc(Assembler::bcondAllZero, ok);
4059 // The plain disassembler does not recognize illtrap. It instead displays
4060 // a 32-bit value. Issuing two illtraps assures the disassembler finds
4061 // the proper beginning of the next instruction.
4062 z_illtrap(0xd1);
4063 z_illtrap(0xd1);
4064 bind(ok);
4065 #endif
4066 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
4067
4068 BLOCK_COMMENT("} cKlass decoder (const size)");
4069 }
4070
4071 // This variant of decode_klass_not_null() is for cases where
4072 // 1) the size of the generated instructions may vary
4073 // 2) the result is (potentially) stored in a register different from the source.
4074 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4075 address base = CompressedKlassPointers::base();
4076 int shift = CompressedKlassPointers::shift();
4077
4078 BLOCK_COMMENT("cKlass decoder {");
4079
4080 if (src == noreg) src = dst;
4081
4082 if (shift != 0) { // Shift or at least move required?
4083 z_sllg(dst, src, shift);
4084 } else {
4085 lgr_if_needed(dst, src);
4086 }
4087
4088 if (base != nullptr) {
4089 unsigned int base_h = ((unsigned long)base)>>32;
4090 unsigned int base_l = (unsigned int)((unsigned long)base);
4091 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4092 z_aih(dst, base_h); // Base has not set bits in lower half.
4093 } else if ((base_h == 0) && (base_l != 0)) {
4094 z_algfi(dst, base_l); // Base has no set bits in upper half.
4095 } else {
4096 load_const_optimized(Z_R0, base); // Base has set bits everywhere.
4097 z_algr(dst, Z_R0);
4098 }
4099 }
4100
4101 #ifdef ASSERT
4102 Label ok;
4103 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment.
4104 z_brc(Assembler::bcondAllZero, ok);
4105 // The plain disassembler does not recognize illtrap. It instead displays
4106 // a 32-bit value. Issuing two illtraps assures the disassembler finds
4107 // the proper beginning of the next instruction.
4108 z_illtrap(0xd2);
4109 z_illtrap(0xd2);
4110 bind(ok);
4111 #endif
4112 BLOCK_COMMENT("} cKlass decoder");
4113 }
4114
4115 void MacroAssembler::load_klass(Register klass, Address mem) {
4116 z_llgf(klass, mem);
4117 // Attention: no null check here!
4118 decode_klass_not_null(klass);
4119 }
4120
4121 // Loads the obj's Klass* into dst.
4122 // Input:
4123 // src - the oop we want to load the klass from.
4124 // dst - output nklass.
4125 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
4126 BLOCK_COMMENT("load_narrow_klass_compact {");
4127 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4128 z_lg(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4129 z_srlg(dst, dst, markWord::klass_shift);
4130 BLOCK_COMMENT("} load_narrow_klass_compact");
4131 }
4132
4133 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
4134 BLOCK_COMMENT("cmp_klass {");
4135 assert_different_registers(obj, klass, tmp);
4136 if (UseCompactObjectHeaders) {
4137 assert(tmp != noreg, "required");
4138 assert_different_registers(klass, obj, tmp);
4139 load_narrow_klass_compact(tmp, obj);
4140 z_cr(klass, tmp);
4141 } else {
4142 z_c(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
4143 }
4144 BLOCK_COMMENT("} cmp_klass");
4145 }
4146
4147 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) {
4148 BLOCK_COMMENT("cmp_klasses_from_objects {");
4149 if (UseCompactObjectHeaders) {
4150 assert(tmp1 != noreg && tmp2 != noreg, "required");
4151 assert_different_registers(obj1, obj2, tmp1, tmp2);
4152 load_narrow_klass_compact(tmp1, obj1);
4153 load_narrow_klass_compact(tmp2, obj2);
4154 z_cr(tmp1, tmp2);
4155 } else {
4156 z_l(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
4157 z_c(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes()));
4158 }
4159 BLOCK_COMMENT("} cmp_klasses_from_objects");
4160 }
4161
4162 void MacroAssembler::load_klass(Register klass, Register src_oop) {
4163 if (UseCompactObjectHeaders) {
4164 load_narrow_klass_compact(klass, src_oop);
4165 decode_klass_not_null(klass);
4166 } else {
4167 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
4168 decode_klass_not_null(klass);
4169 }
4170 }
4171
4172 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
4173 assert(!UseCompactObjectHeaders, "Don't use with compact headers");
4174 assert_different_registers(dst_oop, klass, Z_R0);
4175 if (ck == noreg) ck = klass;
4176 encode_klass_not_null(ck, klass);
4177 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
4178 }
4179
4180 void MacroAssembler::store_klass_gap(Register s, Register d) {
4181 assert(!UseCompactObjectHeaders, "Don't use with compact headers");
4182 assert(s != d, "not enough registers");
4183 // Support s = noreg.
4184 if (s != noreg) {
4185 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
4186 } else {
4187 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
4188 }
4189 }
4190
4191 // Compare klass ptr in memory against klass ptr in register.
4192 //
4193 // Rop1 - klass in register, always uncompressed.
4194 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
4195 // Rbase - Base address of cKlass in memory.
4196 // maybenull - True if Rop1 possibly is a null.
4197 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull) {
4198
4199 BLOCK_COMMENT("compare klass ptr {");
4200
4201 const int shift = CompressedKlassPointers::shift();
4202 address base = CompressedKlassPointers::base();
4203
4204 if (UseCompactObjectHeaders) {
4205 assert(shift >= 3, "cKlass encoder detected bad shift");
4206 } else {
4207 assert((shift == 0) || (shift == 3), "cKlass encoder detected bad shift");
4208 }
4209 assert_different_registers(Rop1, Z_R0);
4210 assert_different_registers(Rop1, Rbase, Z_R1);
4211
4212 // First encode register oop and then compare with cOop in memory.
4213 // This sequence saves an unnecessary cOop load and decode.
4214 if (base == nullptr) {
4215 if (shift == 0) {
4216 z_cl(Rop1, disp, Rbase); // Unscaled
4217 } else {
4218 z_srlg(Z_R0, Rop1, shift); // ZeroBased
4219 z_cl(Z_R0, disp, Rbase);
4220 }
4221 } else { // HeapBased
4222 #ifdef ASSERT
4223 bool used_R0 = true;
4224 bool used_R1 = true;
4225 #endif
4226 Register current = Rop1;
4227 Label done;
4228
4229 if (maybenull) { // null pointer must be preserved!
4230 z_ltgr(Z_R0, current);
4231 z_bre(done);
4232 current = Z_R0;
4233 }
4234
4235 unsigned int base_h = ((unsigned long)base)>>32;
4236 unsigned int base_l = (unsigned int)((unsigned long)base);
4237 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
4238 lgr_if_needed(Z_R0, current);
4239 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
4240 } else if ((base_h == 0) && (base_l != 0)) {
4241 lgr_if_needed(Z_R0, current);
4242 z_agfi(Z_R0, -(int)base_l);
4243 } else {
4244 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4245 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
4246 }
4247
4248 if (shift != 0) {
4249 z_srlg(Z_R0, Z_R0, shift);
4250 }
4251 bind(done);
4252 z_cl(Z_R0, disp, Rbase);
4253 #ifdef ASSERT
4254 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4255 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4256 #endif
4257 }
4258
4259 BLOCK_COMMENT("} compare klass ptr");
4260 }
4261
4262 //---------------------------
4263 // Compressed oops
4264 //---------------------------
4265
4266 void MacroAssembler::encode_heap_oop(Register oop) {
4267 oop_encoder(oop, oop, true /*maybe null*/);
4268 }
4269
4270 void MacroAssembler::encode_heap_oop_not_null(Register oop) {
4271 oop_encoder(oop, oop, false /*not null*/);
4272 }
4273
4274 // Called with something derived from the oop base. e.g. oop_base>>3.
4275 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
4276 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff;
4277 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
4278 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
4279 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
4280 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
4281 + (oop_base_lh == 0 ? 0:1)
4282 + (oop_base_hl == 0 ? 0:1)
4283 + (oop_base_hh == 0 ? 0:1);
4284
4285 assert(oop_base != 0, "This is for HeapBased cOops only");
4286
4287 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2.
4288 uint64_t pow2_offset = 0x10000 - oop_base_ll;
4289 if (pow2_offset < 0x8000) { // This might not be necessary.
4290 uint64_t oop_base2 = oop_base + pow2_offset;
4291
4292 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff;
4293 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
4294 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
4295 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
4296 n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
4297 (oop_base_lh == 0 ? 0:1) +
4298 (oop_base_hl == 0 ? 0:1) +
4299 (oop_base_hh == 0 ? 0:1);
4300 if (n_notzero_parts == 1) {
4301 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
4302 return -pow2_offset;
4303 }
4304 }
4305 }
4306 return 0;
4307 }
4308
4309 // If base address is offset from a straight power of two by just a few pages,
4310 // return this offset to the caller for a possible later composite add.
4311 // TODO/FIX: will only work correctly for 4k pages.
4312 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
4313 int pow2_offset = get_oop_base_pow2_offset(oop_base);
4314
4315 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
4316
4317 return pow2_offset;
4318 }
4319
4320 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
4321 int offset = get_oop_base(Rbase, oop_base);
4322 z_lcgr(Rbase, Rbase);
4323 return -offset;
4324 }
4325
4326 // Compare compressed oop in memory against oop in register.
4327 // Rop1 - Oop in register.
4328 // disp - Offset of cOop in memory.
4329 // Rbase - Base address of cOop in memory.
4330 // maybenull - True if Rop1 possibly is a null.
4331 // maybenulltarget - Branch target for Rop1 == nullptr, if flow control shall NOT continue with compare instruction.
4332 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybenull) {
4333 Register Rbase = mem.baseOrR0();
4334 Register Rindex = mem.indexOrR0();
4335 int64_t disp = mem.disp();
4336
4337 const int shift = CompressedOops::shift();
4338 address base = CompressedOops::base();
4339
4340 assert(UseCompressedOops, "must be on to call this method");
4341 assert(Universe::heap() != nullptr, "java heap must be initialized to call this method");
4342 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4343 assert_different_registers(Rop1, Z_R0);
4344 assert_different_registers(Rop1, Rbase, Z_R1);
4345 assert_different_registers(Rop1, Rindex, Z_R1);
4346
4347 BLOCK_COMMENT("compare heap oop {");
4348
4349 // First encode register oop and then compare with cOop in memory.
4350 // This sequence saves an unnecessary cOop load and decode.
4351 if (base == nullptr) {
4352 if (shift == 0) {
4353 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled
4354 } else {
4355 z_srlg(Z_R0, Rop1, shift); // ZeroBased
4356 z_cl(Z_R0, disp, Rindex, Rbase);
4357 }
4358 } else { // HeapBased
4359 #ifdef ASSERT
4360 bool used_R0 = true;
4361 bool used_R1 = true;
4362 #endif
4363 Label done;
4364 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4365
4366 if (maybenull) { // null pointer must be preserved!
4367 z_ltgr(Z_R0, Rop1);
4368 z_bre(done);
4369 }
4370
4371 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
4372 z_srlg(Z_R0, Z_R0, shift);
4373
4374 bind(done);
4375 z_cl(Z_R0, disp, Rindex, Rbase);
4376 #ifdef ASSERT
4377 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4378 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4379 #endif
4380 }
4381 BLOCK_COMMENT("} compare heap oop");
4382 }
4383
4384 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
4385 const Address& addr, Register val,
4386 Register tmp1, Register tmp2, Register tmp3) {
4387 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4388 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator");
4389 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4390 decorators = AccessInternal::decorator_fixup(decorators, type);
4391 bool as_raw = (decorators & AS_RAW) != 0;
4392 if (as_raw) {
4393 bs->BarrierSetAssembler::store_at(this, decorators, type,
4394 addr, val,
4395 tmp1, tmp2, tmp3);
4396 } else {
4397 bs->store_at(this, decorators, type,
4398 addr, val,
4399 tmp1, tmp2, tmp3);
4400 }
4401 }
4402
4403 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
4404 const Address& addr, Register dst,
4405 Register tmp1, Register tmp2, Label *is_null) {
4406 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4407 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator");
4408 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4409 decorators = AccessInternal::decorator_fixup(decorators, type);
4410 bool as_raw = (decorators & AS_RAW) != 0;
4411 if (as_raw) {
4412 bs->BarrierSetAssembler::load_at(this, decorators, type,
4413 addr, dst,
4414 tmp1, tmp2, is_null);
4415 } else {
4416 bs->load_at(this, decorators, type,
4417 addr, dst,
4418 tmp1, tmp2, is_null);
4419 }
4420 }
4421
4422 void MacroAssembler::load_heap_oop(Register dest, const Address &a,
4423 Register tmp1, Register tmp2,
4424 DecoratorSet decorators, Label *is_null) {
4425 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null);
4426 }
4427
4428 void MacroAssembler::store_heap_oop(Register Roop, const Address &a,
4429 Register tmp1, Register tmp2, Register tmp3,
4430 DecoratorSet decorators) {
4431 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3);
4432 }
4433
4434 //-------------------------------------------------
4435 // Encode compressed oop. Generally usable encoder.
4436 //-------------------------------------------------
4437 // Rsrc - contains regular oop on entry. It remains unchanged.
4438 // Rdst - contains compressed oop on exit.
4439 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
4440 //
4441 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
4442 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
4443 //
4444 // only32bitValid is set, if later code only uses the lower 32 bits. In this
4445 // case we must not fix the upper 32 bits.
4446 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybenull,
4447 Register Rbase, int pow2_offset, bool only32bitValid) {
4448
4449 const address oop_base = CompressedOops::base();
4450 const int oop_shift = CompressedOops::shift();
4451 const bool disjoint = CompressedOops::base_disjoint();
4452
4453 assert(UseCompressedOops, "must be on to call this method");
4454 assert(Universe::heap() != nullptr, "java heap must be initialized to call this encoder");
4455 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4456
4457 if (disjoint || (oop_base == nullptr)) {
4458 BLOCK_COMMENT("cOop encoder zeroBase {");
4459 if (oop_shift == 0) {
4460 if (oop_base != nullptr && !only32bitValid) {
4461 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
4462 } else {
4463 lgr_if_needed(Rdst, Rsrc);
4464 }
4465 } else {
4466 z_srlg(Rdst, Rsrc, oop_shift);
4467 if (oop_base != nullptr && !only32bitValid) {
4468 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4469 }
4470 }
4471 BLOCK_COMMENT("} cOop encoder zeroBase");
4472 return;
4473 }
4474
4475 bool used_R0 = false;
4476 bool used_R1 = false;
4477
4478 BLOCK_COMMENT("cOop encoder general {");
4479 assert_different_registers(Rdst, Z_R1);
4480 assert_different_registers(Rsrc, Rbase);
4481 if (maybenull) {
4482 Label done;
4483 // We reorder shifting and subtracting, so that we can compare
4484 // and shift in parallel:
4485 //
4486 // cycle 0: potential LoadN, base = <const>
4487 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0)
4488 // cycle 2: if (cr) br, dst = dst + base + offset
4489
4490 // Get oop_base components.
4491 if (pow2_offset == -1) {
4492 if (Rdst == Rbase) {
4493 if (Rdst == Z_R1 || Rsrc == Z_R1) {
4494 Rbase = Z_R0;
4495 used_R0 = true;
4496 } else {
4497 Rdst = Z_R1;
4498 used_R1 = true;
4499 }
4500 }
4501 if (Rbase == Z_R1) {
4502 used_R1 = true;
4503 }
4504 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
4505 }
4506 assert_different_registers(Rdst, Rbase);
4507
4508 // Check for null oop (must be left alone) and shift.
4509 if (oop_shift != 0) { // Shift out alignment bits
4510 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
4511 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4512 } else {
4513 z_srlg(Rdst, Rsrc, oop_shift);
4514 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero.
4515 // This probably is faster, as it does not write a register. No!
4516 // z_cghi(Rsrc, 0);
4517 }
4518 } else {
4519 z_ltgr(Rdst, Rsrc); // Move null to result register.
4520 }
4521 z_bre(done);
4522
4523 // Subtract oop_base components.
4524 if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
4525 z_algr(Rdst, Rbase);
4526 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
4527 } else {
4528 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
4529 }
4530 if (!only32bitValid) {
4531 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4532 }
4533 bind(done);
4534
4535 } else { // not null
4536 // Get oop_base components.
4537 if (pow2_offset == -1) {
4538 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
4539 }
4540
4541 // Subtract oop_base components and shift.
4542 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
4543 // Don't use lay instruction.
4544 if (Rdst == Rsrc) {
4545 z_algr(Rdst, Rbase);
4546 } else {
4547 lgr_if_needed(Rdst, Rbase);
4548 z_algr(Rdst, Rsrc);
4549 }
4550 if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
4551 } else {
4552 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
4553 }
4554 if (oop_shift != 0) { // Shift out alignment bits.
4555 z_srlg(Rdst, Rdst, oop_shift);
4556 }
4557 if (!only32bitValid) {
4558 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4559 }
4560 }
4561 #ifdef ASSERT
4562 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
4563 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
4564 #endif
4565 BLOCK_COMMENT("} cOop encoder general");
4566 }
4567
4568 //-------------------------------------------------
4569 // decode compressed oop. Generally usable decoder.
4570 //-------------------------------------------------
4571 // Rsrc - contains compressed oop on entry.
4572 // Rdst - contains regular oop on exit.
4573 // Rdst and Rsrc may indicate same register.
4574 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
4575 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
4576 // Rbase - register to use for the base
4577 // pow2_offset - offset of base to nice value. If -1, base must be loaded.
4578 // For performance, it is good to
4579 // - avoid Z_R0 for any of the argument registers.
4580 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
4581 // - avoid Z_R1 for Rdst if Rdst == Rbase.
4582 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybenull, Register Rbase, int pow2_offset) {
4583
4584 const address oop_base = CompressedOops::base();
4585 const int oop_shift = CompressedOops::shift();
4586 const bool disjoint = CompressedOops::base_disjoint();
4587
4588 assert(UseCompressedOops, "must be on to call this method");
4589 assert(Universe::heap() != nullptr, "java heap must be initialized to call this decoder");
4590 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
4591 "cOop encoder detected bad shift");
4592
4593 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
4594
4595 if (oop_base != nullptr) {
4596 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
4597 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
4598 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
4599 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
4600 BLOCK_COMMENT("cOop decoder disjointBase {");
4601 // We do not need to load the base. Instead, we can install the upper bits
4602 // with an OR instead of an ADD.
4603 Label done;
4604
4605 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4606 if (maybenull) { // null pointer must be preserved!
4607 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4608 z_bre(done);
4609 } else {
4610 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4611 }
4612 if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
4613 z_oihf(Rdst, oop_base_hf);
4614 } else if (oop_base_hl != 0) {
4615 z_oihl(Rdst, oop_base_hl);
4616 } else {
4617 assert(oop_base_hh != 0, "not heapbased mode");
4618 z_oihh(Rdst, oop_base_hh);
4619 }
4620 bind(done);
4621 BLOCK_COMMENT("} cOop decoder disjointBase");
4622 } else {
4623 BLOCK_COMMENT("cOop decoder general {");
4624 // There are three decode steps:
4625 // scale oop offset (shift left)
4626 // get base (in reg) and pow2_offset (constant)
4627 // add base, pow2_offset, and oop offset
4628 // The following register overlap situations may exist:
4629 // Rdst == Rsrc, Rbase any other
4630 // not a problem. Scaling in-place leaves Rbase undisturbed.
4631 // Loading Rbase does not impact the scaled offset.
4632 // Rdst == Rbase, Rsrc any other
4633 // scaling would destroy a possibly preloaded Rbase. Loading Rbase
4634 // would destroy the scaled offset.
4635 // Remedy: use Rdst_tmp if Rbase has been preloaded.
4636 // use Rbase_tmp if base has to be loaded.
4637 // Rsrc == Rbase, Rdst any other
4638 // Only possible without preloaded Rbase.
4639 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
4640 // Rsrc == Rbase, Rdst == Rbase
4641 // Only possible without preloaded Rbase.
4642 // Loading Rbase would destroy compressed oop. Scaling in-place is ok.
4643 // Remedy: use Rbase_tmp.
4644 //
4645 Label done;
4646 Register Rdst_tmp = Rdst;
4647 Register Rbase_tmp = Rbase;
4648 bool used_R0 = false;
4649 bool used_R1 = false;
4650 bool base_preloaded = pow2_offset >= 0;
4651 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
4652 assert(oop_shift != 0, "room for optimization");
4653
4654 // Check if we need to use scratch registers.
4655 if (Rdst == Rbase) {
4656 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
4657 if (Rdst != Rsrc) {
4658 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4659 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4660 } else {
4661 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
4662 }
4663 }
4664 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
4665
4666 // Scale oop and check for null.
4667 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4668 if (maybenull) { // null pointer must be preserved!
4669 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4670 z_bre(done);
4671 } else {
4672 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4673 }
4674
4675 // Get oop_base components.
4676 if (!base_preloaded) {
4677 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
4678 }
4679
4680 // Add up all components.
4681 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
4682 z_algr(Rdst_tmp, Rbase_tmp);
4683 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
4684 } else {
4685 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
4686 }
4687
4688 bind(done);
4689 lgr_if_needed(Rdst, Rdst_tmp);
4690 #ifdef ASSERT
4691 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
4692 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
4693 #endif
4694 BLOCK_COMMENT("} cOop decoder general");
4695 }
4696 } else {
4697 BLOCK_COMMENT("cOop decoder zeroBase {");
4698 if (oop_shift == 0) {
4699 lgr_if_needed(Rdst, Rsrc);
4700 } else {
4701 z_sllg(Rdst, Rsrc, oop_shift);
4702 }
4703 BLOCK_COMMENT("} cOop decoder zeroBase");
4704 }
4705 }
4706
4707 // ((OopHandle)result).resolve();
4708 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) {
4709 access_load_at(T_OBJECT, IN_NATIVE, Address(result, 0), result, tmp1, tmp2);
4710 }
4711
4712 void MacroAssembler::load_method_holder(Register holder, Register method) {
4713 mem2reg_opt(holder, Address(method, Method::const_offset()));
4714 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset()));
4715 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset()));
4716 }
4717
4718 //---------------------------------------------------------------
4719 //--- Operations on arrays.
4720 //---------------------------------------------------------------
4721
4722 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4723 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4724 // work registers anyway.
4725 // Actually, only r0, r1, and r5 are killed.
4726 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) {
4727
4728 int block_start = offset();
4729 Register dst_len = Z_R1; // Holds dst len for MVCLE.
4730 Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4731
4732 Label doXC, doMVCLE, done;
4733
4734 BLOCK_COMMENT("Clear_Array {");
4735
4736 // Check for zero len and convert to long.
4737 z_ltgfr(odd_tmp_reg, cnt_arg);
4738 z_bre(done); // Nothing to do if len == 0.
4739
4740 // Prefetch data to be cleared.
4741 if (VM_Version::has_Prefetch()) {
4742 z_pfd(0x02, 0, Z_R0, base_pointer_arg);
4743 z_pfd(0x02, 256, Z_R0, base_pointer_arg);
4744 }
4745
4746 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear.
4747 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW).
4748 z_brnh(doXC); // If so, use executed XC to clear.
4749
4750 // MVCLE: initialize long arrays (general case).
4751 bind(doMVCLE);
4752 z_lgr(dst_addr, base_pointer_arg);
4753 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
4754 // The even register of the register pair is not killed.
4755 clear_reg(odd_tmp_reg, true, false);
4756 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0);
4757 z_bru(done);
4758
4759 // XC: initialize short arrays.
4760 Label XC_template; // Instr template, never exec directly!
4761 bind(XC_template);
4762 z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
4763
4764 bind(doXC);
4765 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
4766 if (VM_Version::has_ExecuteExtensions()) {
4767 z_exrl(dst_len, XC_template); // Execute XC with var. len.
4768 } else {
4769 z_larl(odd_tmp_reg, XC_template);
4770 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len.
4771 }
4772 // z_bru(done); // fallthru
4773
4774 bind(done);
4775
4776 BLOCK_COMMENT("} Clear_Array");
4777
4778 int block_end = offset();
4779 return block_end - block_start;
4780 }
4781
4782 // Compiler ensures base is doubleword aligned and cnt is count of doublewords.
4783 // Emitter does not KILL any arguments nor work registers.
4784 // Emitter generates up to 16 XC instructions, depending on the array length.
4785 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
4786 int block_start = offset();
4787 int off;
4788 int lineSize_Bytes = AllocatePrefetchStepSize;
4789 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord;
4790 bool doPrefetch = VM_Version::has_Prefetch();
4791 int XC_maxlen = 256;
4792 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
4793
4794 BLOCK_COMMENT("Clear_Array_Const {");
4795 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
4796
4797 // Do less prefetching for very short arrays.
4798 if (numXCInstr > 0) {
4799 // Prefetch only some cache lines, then begin clearing.
4800 if (doPrefetch) {
4801 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear,
4802 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line.
4803 } else {
4804 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
4805 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
4806 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
4807 }
4808 }
4809 }
4810
4811 for (off=0; off<(numXCInstr-1); off++) {
4812 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
4813
4814 // Prefetch some cache lines in advance.
4815 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
4816 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
4817 }
4818 }
4819 if (off*XC_maxlen < cnt*BytesPerWord) {
4820 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
4821 }
4822 }
4823 BLOCK_COMMENT("} Clear_Array_Const");
4824
4825 int block_end = offset();
4826 return block_end - block_start;
4827 }
4828
4829 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4830 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4831 // work registers anyway.
4832 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed.
4833 //
4834 // For very large arrays, exploit MVCLE H/W support.
4835 // MVCLE instruction automatically exploits H/W-optimized page mover.
4836 // - Bytes up to next page boundary are cleared with a series of XC to self.
4837 // - All full pages are cleared with the page mover H/W assist.
4838 // - Remaining bytes are again cleared by a series of XC to self.
4839 //
4840 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) {
4841
4842 int block_start = offset();
4843 Register dst_len = Z_R1; // Holds dst len for MVCLE.
4844 Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4845
4846 BLOCK_COMMENT("Clear_Array_Const_Big {");
4847
4848 // Get len to clear.
4849 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8
4850
4851 // Prepare other args to MVCLE.
4852 z_lgr(dst_addr, base_pointer_arg);
4853 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
4854 // The even register of the register pair is not killed.
4855 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero.
4856 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0);
4857 BLOCK_COMMENT("} Clear_Array_Const_Big");
4858
4859 int block_end = offset();
4860 return block_end - block_start;
4861 }
4862
4863 // Allocator.
4864 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
4865 Register cnt_reg,
4866 Register tmp1_reg, Register tmp2_reg) {
4867 // Tmp1 is oddReg.
4868 // Tmp2 is evenReg.
4869
4870 int block_start = offset();
4871 Label doMVC, doMVCLE, done, MVC_template;
4872
4873 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
4874
4875 // Check for zero len and convert to long.
4876 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case.
4877 z_bre(done); // Nothing to do if len == 0.
4878
4879 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready.
4880
4881 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW).
4882 z_brnh(doMVC); // If so, use executed MVC to clear.
4883
4884 bind(doMVCLE); // A lot of data (more than 256 bytes).
4885 // Prep dest reg pair.
4886 z_lgr(Z_R0, dst_reg); // dst addr
4887 // Dst len already in Z_R1.
4888 // Prep src reg pair.
4889 z_lgr(tmp2_reg, src_reg); // src addr
4890 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len.
4891
4892 // Do the copy.
4893 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
4894 z_bru(done); // All done.
4895
4896 bind(MVC_template); // Just some data (not more than 256 bytes).
4897 z_mvc(0, 0, dst_reg, 0, src_reg);
4898
4899 bind(doMVC);
4900
4901 if (VM_Version::has_ExecuteExtensions()) {
4902 add2reg(Z_R1, -1);
4903 } else {
4904 add2reg(tmp1_reg, -1, Z_R1);
4905 z_larl(Z_R1, MVC_template);
4906 }
4907
4908 if (VM_Version::has_Prefetch()) {
4909 z_pfd(1, 0,Z_R0,src_reg);
4910 z_pfd(2, 0,Z_R0,dst_reg);
4911 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy.
4912 // z_pfd(2,256,Z_R0,dst_reg);
4913 }
4914
4915 if (VM_Version::has_ExecuteExtensions()) {
4916 z_exrl(Z_R1, MVC_template);
4917 } else {
4918 z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4919 }
4920
4921 bind(done);
4922
4923 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4924
4925 int block_end = offset();
4926 return block_end - block_start;
4927 }
4928
4929 //-------------------------------------------------
4930 // Constants (scalar and oop) in constant pool
4931 //-------------------------------------------------
4932
4933 // Add a non-relocated constant to the CP.
4934 int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
4935 long value = val.value();
4936 address tocPos = long_constant(value);
4937
4938 if (tocPos != nullptr) {
4939 int tocOffset = (int)(tocPos - code()->consts()->start());
4940 return tocOffset;
4941 }
4942 // Address_constant returned null, so no constant entry has been created.
4943 // In that case, we return a "fatal" offset, just in case that subsequently
4944 // generated access code is executed.
4945 return -1;
4946 }
4947
4948 // Returns the TOC offset where the address is stored.
4949 // Add a relocated constant to the CP.
4950 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
4951 // Use RelocationHolder::none for the constant pool entry.
4952 // Otherwise we will end up with a failing NativeCall::verify(x),
4953 // where x is the address of the constant pool entry.
4954 address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
4955
4956 if (tocPos != nullptr) {
4957 int tocOffset = (int)(tocPos - code()->consts()->start());
4958 RelocationHolder rsp = oop.rspec();
4959 Relocation *rel = rsp.reloc();
4960
4961 // Store toc_offset in relocation, used by call_far_patchable.
4962 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
4963 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
4964 }
4965 // Relocate at the load's pc.
4966 relocate(rsp);
4967
4968 return tocOffset;
4969 }
4970 // Address_constant returned null, so no constant entry has been created
4971 // in that case, we return a "fatal" offset, just in case that subsequently
4972 // generated access code is executed.
4973 return -1;
4974 }
4975
4976 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
4977 int tocOffset = store_const_in_toc(a);
4978 if (tocOffset == -1) return false;
4979 address tocPos = tocOffset + code()->consts()->start();
4980 assert((address)code()->consts()->start() != nullptr, "Please add CP address");
4981 relocate(a.rspec());
4982 load_long_pcrelative(dst, tocPos);
4983 return true;
4984 }
4985
4986 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
4987 int tocOffset = store_oop_in_toc(a);
4988 if (tocOffset == -1) return false;
4989 address tocPos = tocOffset + code()->consts()->start();
4990 assert((address)code()->consts()->start() != nullptr, "Please add CP address");
4991
4992 load_addr_pcrelative(dst, tocPos);
4993 return true;
4994 }
4995
4996 // If the instruction sequence at the given pc is a load_const_from_toc
4997 // sequence, return the value currently stored at the referenced position
4998 // in the TOC.
4999 intptr_t MacroAssembler::get_const_from_toc(address pc) {
5000
5001 assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
5002
5003 long offset = get_load_const_from_toc_offset(pc);
5004 address dataLoc = nullptr;
5005 if (is_load_const_from_toc_pcrelative(pc)) {
5006 dataLoc = pc + offset;
5007 } else {
5008 CodeBlob* cb = CodeCache::find_blob(pc);
5009 assert(cb && cb->is_nmethod(), "sanity");
5010 nmethod* nm = (nmethod*)cb;
5011 dataLoc = nm->ctable_begin() + offset;
5012 }
5013 return *(intptr_t *)dataLoc;
5014 }
5015
5016 // If the instruction sequence at the given pc is a load_const_from_toc
5017 // sequence, copy the passed-in new_data value into the referenced
5018 // position in the TOC.
5019 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
5020 assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
5021
5022 long offset = MacroAssembler::get_load_const_from_toc_offset(pc);
5023 address dataLoc = nullptr;
5024 if (is_load_const_from_toc_pcrelative(pc)) {
5025 dataLoc = pc+offset;
5026 } else {
5027 nmethod* nm = CodeCache::find_nmethod(pc);
5028 assert((cb == nullptr) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
5029 dataLoc = nm->ctable_begin() + offset;
5030 }
5031 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
5032 *(unsigned long *)dataLoc = new_data;
5033 }
5034 }
5035
5036 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
5037 // site. Verify by calling is_load_const_from_toc() before!!
5038 // Offset is +/- 2**32 -> use long.
5039 long MacroAssembler::get_load_const_from_toc_offset(address a) {
5040 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
5041 // expected code sequence:
5042 // z_lgrl(t, simm32); len = 6
5043 unsigned long inst;
5044 unsigned int len = get_instruction(a, &inst);
5045 return get_pcrel_offset(inst);
5046 }
5047
5048 //**********************************************************************************
5049 // inspection of generated instruction sequences for a particular pattern
5050 //**********************************************************************************
5051
5052 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
5053 #ifdef ASSERT
5054 unsigned long inst;
5055 unsigned int len = get_instruction(a+2, &inst);
5056 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
5057 const int range = 128;
5058 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
5059 VM_Version::z_SIGSEGV();
5060 }
5061 #endif
5062 // expected code sequence:
5063 // z_lgrl(t, relAddr32); len = 6
5064 //TODO: verify accessed data is in CP, if possible.
5065 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used.
5066 }
5067
5068 bool MacroAssembler::is_load_const_from_toc_call(address a) {
5069 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
5070 }
5071
5072 bool MacroAssembler::is_load_const_call(address a) {
5073 return is_load_const(a) && is_call_byregister(a + load_const_size());
5074 }
5075
5076 //-------------------------------------------------
5077 // Emitters for some really CICS instructions
5078 //-------------------------------------------------
5079
5080 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
5081 assert(dst->encoding()%2==0, "must be an even/odd register pair");
5082 assert(src->encoding()%2==0, "must be an even/odd register pair");
5083 assert(pad<256, "must be a padding BYTE");
5084
5085 Label retry;
5086 bind(retry);
5087 Assembler::z_mvcle(dst, src, pad);
5088 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5089 }
5090
5091 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
5092 assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
5093 assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
5094 assert(pad<256, "must be a padding BYTE");
5095
5096 Label retry;
5097 bind(retry);
5098 Assembler::z_clcle(left, right, pad, Z_R0);
5099 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5100 }
5101
5102 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
5103 assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
5104 assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
5105 assert(pad<=0xfff, "must be a padding HALFWORD");
5106 assert(VM_Version::has_ETF2(), "instruction must be available");
5107
5108 Label retry;
5109 bind(retry);
5110 Assembler::z_clclu(left, right, pad, Z_R0);
5111 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5112 }
5113
5114 void MacroAssembler::search_string(Register end, Register start) {
5115 assert(end->encoding() != 0, "end address must not be in R0");
5116 assert(start->encoding() != 0, "start address must not be in R0");
5117
5118 Label retry;
5119 bind(retry);
5120 Assembler::z_srst(end, start);
5121 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5122 }
5123
5124 void MacroAssembler::search_string_uni(Register end, Register start) {
5125 assert(end->encoding() != 0, "end address must not be in R0");
5126 assert(start->encoding() != 0, "start address must not be in R0");
5127 assert(VM_Version::has_ETF3(), "instruction must be available");
5128
5129 Label retry;
5130 bind(retry);
5131 Assembler::z_srstu(end, start);
5132 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5133 }
5134
5135 void MacroAssembler::kmac(Register srcBuff) {
5136 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5137 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
5138
5139 Label retry;
5140 bind(retry);
5141 Assembler::z_kmac(Z_R0, srcBuff);
5142 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5143 }
5144
5145 void MacroAssembler::kimd(Register srcBuff) {
5146 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5147 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
5148
5149 Label retry;
5150 bind(retry);
5151 Assembler::z_kimd(Z_R0, srcBuff);
5152 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5153 }
5154
5155 void MacroAssembler::klmd(Register srcBuff) {
5156 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5157 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
5158
5159 Label retry;
5160 bind(retry);
5161 Assembler::z_klmd(Z_R0, srcBuff);
5162 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5163 }
5164
5165 void MacroAssembler::km(Register dstBuff, Register srcBuff) {
5166 // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
5167 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
5168 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5169 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
5170 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
5171
5172 Label retry;
5173 bind(retry);
5174 Assembler::z_km(dstBuff, srcBuff);
5175 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5176 }
5177
5178 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
5179 // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
5180 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
5181 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5182 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
5183 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
5184
5185 Label retry;
5186 bind(retry);
5187 Assembler::z_kmc(dstBuff, srcBuff);
5188 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5189 }
5190
5191 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) {
5192 // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
5193 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
5194 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
5195 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0");
5196 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0");
5197 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register");
5198 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
5199 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
5200
5201 Label retry;
5202 bind(retry);
5203 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff);
5204 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5205 }
5206
5207 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
5208 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
5209
5210 Label retry;
5211 bind(retry);
5212 Assembler::z_cksm(crcBuff, srcBuff);
5213 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5214 }
5215
5216 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
5217 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
5218 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
5219
5220 Label retry;
5221 bind(retry);
5222 Assembler::z_troo(r1, r2, m3);
5223 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5224 }
5225
5226 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
5227 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
5228 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
5229
5230 Label retry;
5231 bind(retry);
5232 Assembler::z_trot(r1, r2, m3);
5233 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5234 }
5235
5236 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
5237 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
5238 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
5239
5240 Label retry;
5241 bind(retry);
5242 Assembler::z_trto(r1, r2, m3);
5243 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5244 }
5245
5246 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
5247 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
5248 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
5249
5250 Label retry;
5251 bind(retry);
5252 Assembler::z_trtt(r1, r2, m3);
5253 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
5254 }
5255
5256 //---------------------------------------
5257 // Helpers for Intrinsic Emitters
5258 //---------------------------------------
5259
5260 /**
5261 * uint32_t crc;
5262 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
5263 */
5264 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
5265 assert_different_registers(crc, table, tmp);
5266 assert_different_registers(val, table);
5267 if (crc == val) { // Must rotate first to use the unmodified value.
5268 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
5269 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
5270 } else {
5271 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
5272 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
5273 }
5274 z_x(crc, Address(table, tmp, 0));
5275 }
5276
5277 /**
5278 * uint32_t crc;
5279 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
5280 */
5281 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
5282 fold_byte_crc32(crc, crc, table, tmp);
5283 }
5284
5285 /**
5286 * Emits code to update CRC-32 with a byte value according to constants in table.
5287 *
5288 * @param [in,out]crc Register containing the crc.
5289 * @param [in]val Register containing the byte to fold into the CRC.
5290 * @param [in]table Register containing the table of crc constants.
5291 *
5292 * uint32_t crc;
5293 * val = crc_table[(val ^ crc) & 0xFF];
5294 * crc = val ^ (crc >> 8);
5295 */
5296 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
5297 z_xr(val, crc);
5298 fold_byte_crc32(crc, val, table, val);
5299 }
5300
5301
5302 /**
5303 * @param crc register containing existing CRC (32-bit)
5304 * @param buf register pointing to input byte buffer (byte*)
5305 * @param len register containing number of bytes
5306 * @param table register pointing to CRC table
5307 */
5308 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
5309 assert_different_registers(crc, buf, len, table, data);
5310
5311 Label L_mainLoop, L_done;
5312 const int mainLoop_stepping = 1;
5313
5314 // Process all bytes in a single-byte loop.
5315 z_ltr(len, len);
5316 z_brnh(L_done);
5317
5318 bind(L_mainLoop);
5319 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
5320 add2reg(buf, mainLoop_stepping); // Advance buffer position.
5321 update_byte_crc32(crc, data, table);
5322 z_brct(len, L_mainLoop); // Iterate.
5323
5324 bind(L_done);
5325 }
5326
5327 /**
5328 * Emits code to update CRC-32 with a 4-byte value according to constants in table.
5329 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
5330 *
5331 */
5332 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
5333 Register t0, Register t1, Register t2, Register t3) {
5334 // This is what we implement (the DOBIG4 part):
5335 //
5336 // #define DOBIG4 c ^= *++buf4; \
5337 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
5338 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
5339 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
5340 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
5341 const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
5342 const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
5343 const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
5344 const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
5345
5346 // XOR crc with next four bytes of buffer.
5347 lgr_if_needed(t0, crc);
5348 z_x(t0, Address(buf, bufDisp));
5349 if (bufInc != 0) {
5350 add2reg(buf, bufInc);
5351 }
5352
5353 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
5354 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
5355 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
5356 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
5357 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
5358
5359 // XOR indexed table values to calculate updated crc.
5360 z_ly(t2, Address(table, t2, (intptr_t)ix1));
5361 z_ly(t0, Address(table, t0, (intptr_t)ix3));
5362 z_xy(t2, Address(table, t3, (intptr_t)ix0));
5363 z_xy(t0, Address(table, t1, (intptr_t)ix2));
5364 z_xr(t0, t2); // Now t0 contains the updated CRC value.
5365 lgr_if_needed(crc, t0);
5366 }
5367
5368 /**
5369 * @param crc register containing existing CRC (32-bit)
5370 * @param buf register pointing to input byte buffer (byte*)
5371 * @param len register containing number of bytes
5372 * @param table register pointing to CRC table
5373 *
5374 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
5375 */
5376 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
5377 Register t0, Register t1, Register t2, Register t3,
5378 bool invertCRC) {
5379 assert_different_registers(crc, buf, len, table);
5380
5381 Label L_mainLoop, L_tail;
5382 Register data = t0;
5383 Register ctr = Z_R0;
5384 const int mainLoop_stepping = 4;
5385 const int log_stepping = exact_log2(mainLoop_stepping);
5386
5387 // Don't test for len <= 0 here. This pathological case should not occur anyway.
5388 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
5389 // The situation itself is detected and handled correctly by the conditional branches
5390 // following aghi(len, -stepping) and aghi(len, +stepping).
5391
5392 if (invertCRC) {
5393 not_(crc, noreg, false); // 1s complement of crc
5394 }
5395
5396 // Check for short (<4 bytes) buffer.
5397 z_srag(ctr, len, log_stepping);
5398 z_brnh(L_tail);
5399
5400 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
5401 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
5402
5403 BIND(L_mainLoop);
5404 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
5405 z_brct(ctr, L_mainLoop); // Iterate.
5406
5407 z_lrvr(crc, crc); // Revert byte order back to original.
5408
5409 // Process last few (<8) bytes of buffer.
5410 BIND(L_tail);
5411 update_byteLoop_crc32(crc, buf, len, table, data);
5412
5413 if (invertCRC) {
5414 not_(crc, noreg, false); // 1s complement of crc
5415 }
5416 }
5417
5418 /**
5419 * @param crc register containing existing CRC (32-bit)
5420 * @param buf register pointing to input byte buffer (byte*)
5421 * @param len register containing number of bytes
5422 * @param table register pointing to CRC table
5423 */
5424 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
5425 Register t0, Register t1, Register t2, Register t3,
5426 bool invertCRC) {
5427 assert_different_registers(crc, buf, len, table);
5428 Register data = t0;
5429
5430 if (invertCRC) {
5431 not_(crc, noreg, false); // 1s complement of crc
5432 }
5433
5434 update_byteLoop_crc32(crc, buf, len, table, data);
5435
5436 if (invertCRC) {
5437 not_(crc, noreg, false); // 1s complement of crc
5438 }
5439 }
5440
5441 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
5442 bool invertCRC) {
5443 assert_different_registers(crc, buf, len, table, tmp);
5444
5445 if (invertCRC) {
5446 not_(crc, noreg, false); // 1s complement of crc
5447 }
5448
5449 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
5450 update_byte_crc32(crc, tmp, table);
5451
5452 if (invertCRC) {
5453 not_(crc, noreg, false); // 1s complement of crc
5454 }
5455 }
5456
5457 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
5458 bool invertCRC) {
5459 assert_different_registers(crc, val, table);
5460
5461 if (invertCRC) {
5462 not_(crc, noreg, false); // 1s complement of crc
5463 }
5464
5465 update_byte_crc32(crc, val, table);
5466
5467 if (invertCRC) {
5468 not_(crc, noreg, false); // 1s complement of crc
5469 }
5470 }
5471
5472 //
5473 // Code for BigInteger::multiplyToLen() intrinsic.
5474 //
5475
5476 // dest_lo += src1 + src2
5477 // dest_hi += carry1 + carry2
5478 // Z_R7 is destroyed !
5479 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
5480 Register src1, Register src2) {
5481 clear_reg(Z_R7);
5482 z_algr(dest_lo, src1);
5483 z_alcgr(dest_hi, Z_R7);
5484 z_algr(dest_lo, src2);
5485 z_alcgr(dest_hi, Z_R7);
5486 }
5487
5488 // Multiply 64 bit by 64 bit first loop.
5489 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
5490 Register x_xstart,
5491 Register y, Register y_idx,
5492 Register z,
5493 Register carry,
5494 Register product,
5495 Register idx, Register kdx) {
5496 // jlong carry, x[], y[], z[];
5497 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
5498 // huge_128 product = y[idx] * x[xstart] + carry;
5499 // z[kdx] = (jlong)product;
5500 // carry = (jlong)(product >>> 64);
5501 // }
5502 // z[xstart] = carry;
5503
5504 Label L_first_loop, L_first_loop_exit;
5505 Label L_one_x, L_one_y, L_multiply;
5506
5507 z_aghi(xstart, -1);
5508 z_brl(L_one_x); // Special case: length of x is 1.
5509
5510 // Load next two integers of x.
5511 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5512 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
5513
5514
5515 bind(L_first_loop);
5516
5517 z_aghi(idx, -1);
5518 z_brl(L_first_loop_exit);
5519 z_aghi(idx, -1);
5520 z_brl(L_one_y);
5521
5522 // Load next two integers of y.
5523 z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
5524 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
5525
5526
5527 bind(L_multiply);
5528
5529 Register multiplicand = product->successor();
5530 Register product_low = multiplicand;
5531
5532 lgr_if_needed(multiplicand, x_xstart);
5533 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand
5534 clear_reg(Z_R7);
5535 z_algr(product_low, carry); // Add carry to result.
5536 z_alcgr(product, Z_R7); // Add carry of the last addition.
5537 add2reg(kdx, -2);
5538
5539 // Store result.
5540 z_sllg(Z_R7, kdx, LogBytesPerInt);
5541 reg2mem_opt(product_low, Address(z, Z_R7, 0));
5542 lgr_if_needed(carry, product);
5543 z_bru(L_first_loop);
5544
5545
5546 bind(L_one_y); // Load one 32 bit portion of y as (0,value).
5547
5548 clear_reg(y_idx);
5549 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
5550 z_bru(L_multiply);
5551
5552
5553 bind(L_one_x); // Load one 32 bit portion of x as (0,value).
5554
5555 clear_reg(x_xstart);
5556 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
5557 z_bru(L_first_loop);
5558
5559 bind(L_first_loop_exit);
5560 }
5561
5562 // Multiply 64 bit by 64 bit and add 128 bit.
5563 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
5564 Register z,
5565 Register yz_idx, Register idx,
5566 Register carry, Register product,
5567 int offset) {
5568 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
5569 // z[kdx] = (jlong)product;
5570
5571 Register multiplicand = product->successor();
5572 Register product_low = multiplicand;
5573
5574 z_sllg(Z_R7, idx, LogBytesPerInt);
5575 mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
5576
5577 lgr_if_needed(multiplicand, x_xstart);
5578 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
5579 mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
5580
5581 add2_with_carry(product, product_low, carry, yz_idx);
5582
5583 z_sllg(Z_R7, idx, LogBytesPerInt);
5584 reg2mem_opt(product_low, Address(z, Z_R7, offset));
5585
5586 }
5587
5588 // Multiply 128 bit by 128 bit. Unrolled inner loop.
5589 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
5590 Register y, Register z,
5591 Register yz_idx, Register idx,
5592 Register jdx,
5593 Register carry, Register product,
5594 Register carry2) {
5595 // jlong carry, x[], y[], z[];
5596 // int kdx = ystart+1;
5597 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5598 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
5599 // z[kdx+idx+1] = (jlong)product;
5600 // jlong carry2 = (jlong)(product >>> 64);
5601 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
5602 // z[kdx+idx] = (jlong)product;
5603 // carry = (jlong)(product >>> 64);
5604 // }
5605 // idx += 2;
5606 // if (idx > 0) {
5607 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
5608 // z[kdx+idx] = (jlong)product;
5609 // carry = (jlong)(product >>> 64);
5610 // }
5611
5612 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5613
5614 // scale the index
5615 lgr_if_needed(jdx, idx);
5616 and_imm(jdx, 0xfffffffffffffffcL);
5617 rshift(jdx, 2);
5618
5619
5620 bind(L_third_loop);
5621
5622 z_aghi(jdx, -1);
5623 z_brl(L_third_loop_exit);
5624 add2reg(idx, -4);
5625
5626 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
5627 lgr_if_needed(carry2, product);
5628
5629 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
5630 lgr_if_needed(carry, product);
5631 z_bru(L_third_loop);
5632
5633
5634 bind(L_third_loop_exit); // Handle any left-over operand parts.
5635
5636 and_imm(idx, 0x3);
5637 z_brz(L_post_third_loop_done);
5638
5639 Label L_check_1;
5640
5641 z_aghi(idx, -2);
5642 z_brl(L_check_1);
5643
5644 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
5645 lgr_if_needed(carry, product);
5646
5647
5648 bind(L_check_1);
5649
5650 add2reg(idx, 0x2);
5651 and_imm(idx, 0x1);
5652 z_aghi(idx, -1);
5653 z_brl(L_post_third_loop_done);
5654
5655 Register multiplicand = product->successor();
5656 Register product_low = multiplicand;
5657
5658 z_sllg(Z_R7, idx, LogBytesPerInt);
5659 clear_reg(yz_idx);
5660 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
5661 lgr_if_needed(multiplicand, x_xstart);
5662 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
5663 clear_reg(yz_idx);
5664 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
5665
5666 add2_with_carry(product, product_low, yz_idx, carry);
5667
5668 z_sllg(Z_R7, idx, LogBytesPerInt);
5669 reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
5670 rshift(product_low, 32);
5671
5672 lshift(product, 32);
5673 z_ogr(product_low, product);
5674 lgr_if_needed(carry, product_low);
5675
5676 bind(L_post_third_loop_done);
5677 }
5678
5679 void MacroAssembler::multiply_to_len(Register x, Register xlen,
5680 Register y, Register ylen,
5681 Register z,
5682 Register tmp1, Register tmp2,
5683 Register tmp3, Register tmp4,
5684 Register tmp5) {
5685 ShortBranchVerifier sbv(this);
5686
5687 assert_different_registers(x, xlen, y, ylen, z,
5688 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
5689 assert_different_registers(x, xlen, y, ylen, z,
5690 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
5691
5692 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
5693
5694 const Register idx = tmp1;
5695 const Register kdx = tmp2;
5696 const Register xstart = tmp3;
5697
5698 const Register y_idx = tmp4;
5699 const Register carry = tmp5;
5700 const Register product = Z_R0_scratch;
5701 const Register x_xstart = Z_R8;
5702
5703 // First Loop.
5704 //
5705 // final static long LONG_MASK = 0xffffffffL;
5706 // int xstart = xlen - 1;
5707 // int ystart = ylen - 1;
5708 // long carry = 0;
5709 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5710 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
5711 // z[kdx] = (int)product;
5712 // carry = product >>> 32;
5713 // }
5714 // z[xstart] = (int)carry;
5715 //
5716
5717 lgr_if_needed(idx, ylen); // idx = ylen
5718 z_agrk(kdx, xlen, ylen); // kdx = xlen + ylen
5719 clear_reg(carry); // carry = 0
5720
5721 Label L_done;
5722
5723 lgr_if_needed(xstart, xlen);
5724 z_aghi(xstart, -1);
5725 z_brl(L_done);
5726
5727 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
5728
5729 NearLabel L_second_loop;
5730 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
5731
5732 NearLabel L_carry;
5733 z_aghi(kdx, -1);
5734 z_brz(L_carry);
5735
5736 // Store lower 32 bits of carry.
5737 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
5738 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5739 rshift(carry, 32);
5740 z_aghi(kdx, -1);
5741
5742
5743 bind(L_carry);
5744
5745 // Store upper 32 bits of carry.
5746 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
5747 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5748
5749 // Second and third (nested) loops.
5750 //
5751 // for (int i = xstart-1; i >= 0; i--) { // Second loop
5752 // carry = 0;
5753 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
5754 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
5755 // (z[k] & LONG_MASK) + carry;
5756 // z[k] = (int)product;
5757 // carry = product >>> 32;
5758 // }
5759 // z[i] = (int)carry;
5760 // }
5761 //
5762 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
5763
5764 const Register jdx = tmp1;
5765
5766 bind(L_second_loop);
5767
5768 clear_reg(carry); // carry = 0;
5769 lgr_if_needed(jdx, ylen); // j = ystart+1
5770
5771 z_aghi(xstart, -1); // i = xstart-1;
5772 z_brl(L_done);
5773
5774 // Use free slots in the current stackframe instead of push/pop.
5775 Address zsave(Z_SP, _z_abi(carg_1));
5776 reg2mem_opt(z, zsave);
5777
5778
5779 Label L_last_x;
5780
5781 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5782 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
5783 z_aghi(xstart, -1); // i = xstart-1;
5784 z_brl(L_last_x);
5785
5786 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5787 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
5788
5789
5790 Label L_third_loop_prologue;
5791
5792 bind(L_third_loop_prologue);
5793
5794 Address xsave(Z_SP, _z_abi(carg_2));
5795 Address xlensave(Z_SP, _z_abi(carg_3));
5796 Address ylensave(Z_SP, _z_abi(carg_4));
5797
5798 reg2mem_opt(x, xsave);
5799 reg2mem_opt(xstart, xlensave);
5800 reg2mem_opt(ylen, ylensave);
5801
5802
5803 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
5804
5805 mem2reg_opt(z, zsave);
5806 mem2reg_opt(x, xsave);
5807 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter!
5808 mem2reg_opt(ylen, ylensave);
5809
5810 add2reg(tmp3, 1, xlen);
5811 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
5812 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5813 z_aghi(tmp3, -1);
5814 z_brl(L_done);
5815
5816 rshift(carry, 32);
5817 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
5818 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5819 z_bru(L_second_loop);
5820
5821 // Next infrequent code is moved outside loops.
5822 bind(L_last_x);
5823
5824 clear_reg(x_xstart);
5825 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
5826 z_bru(L_third_loop_prologue);
5827
5828 bind(L_done);
5829
5830 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
5831 }
5832
5833 void MacroAssembler::asm_assert(branch_condition cond, const char* msg, int id, bool is_static) {
5834 #ifdef ASSERT
5835 Label ok;
5836 z_brc(cond, ok);
5837 is_static ? stop_static(msg, id) : stop(msg, id);
5838 bind(ok);
5839 #endif // ASSERT
5840 }
5841
5842 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
5843 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
5844 #ifdef ASSERT
5845 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id);
5846 #endif // ASSERT
5847 }
5848
5849 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
5850 Register mem_base, const char* msg, int id) {
5851 #ifdef ASSERT
5852 switch (size) {
5853 case 4:
5854 load_and_test_int(Z_R0, Address(mem_base, mem_offset));
5855 break;
5856 case 8:
5857 load_and_test_long(Z_R0, Address(mem_base, mem_offset));
5858 break;
5859 default:
5860 ShouldNotReachHere();
5861 }
5862 // if relocation is not allowed then stop_static() will be called otherwise call stop()
5863 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id, !allow_relocation);
5864 #endif // ASSERT
5865 }
5866
5867 // Check the condition
5868 // expected_size == FP - SP
5869 // after transformation:
5870 // expected_size - FP + SP == 0
5871 // Destroys Register expected_size if no tmp register is passed.
5872 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
5873 #ifdef ASSERT
5874 lgr_if_needed(tmp, expected_size);
5875 z_algr(tmp, Z_SP);
5876 z_slg(tmp, 0, Z_R0, Z_SP);
5877 asm_assert(bcondEqual, msg, id);
5878 #endif // ASSERT
5879 }
5880
5881 #ifdef ASSERT
5882 bool is_excluded(Register excluded_register[], Register reg, int n) {
5883 for (int i = 0; i < n; i++) {
5884 if (excluded_register[i] == reg) {
5885 return true;
5886 }
5887 }
5888 return false;
5889 }
5890
5891 void MacroAssembler::clobber_volatile_registers(Register excluded_register[], int n) {
5892 const int magic_number = 0x82;
5893
5894 for (int i = 0; i < 6 /* R0 to R5 */; i++) {
5895 Register reg = as_Register(i);
5896 if (!is_excluded(excluded_register, reg, n)) {
5897 load_const_optimized(reg, magic_number);
5898 }
5899 }
5900 }
5901 #endif // ASSERT
5902
5903 // Save and restore functions: Exclude Z_R0.
5904 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) {
5905 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord;
5906 if (include_fp) {
5907 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord;
5908 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord;
5909 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord;
5910 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord;
5911 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord;
5912 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord;
5913 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord;
5914 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord;
5915 }
5916 if (include_flags) {
5917 Label done;
5918 z_mvi(Address(dst, offset), 2); // encoding: equal
5919 z_bre(done);
5920 z_mvi(Address(dst, offset), 4); // encoding: higher
5921 z_brh(done);
5922 z_mvi(Address(dst, offset), 1); // encoding: lower
5923 bind(done);
5924 }
5925 }
5926 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) {
5927 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord;
5928 if (include_fp) {
5929 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord;
5930 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord;
5931 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord;
5932 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord;
5933 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord;
5934 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord;
5935 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord;
5936 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord;
5937 }
5938 if (include_flags) {
5939 z_cli(Address(src, offset), 2); // see encoding above
5940 }
5941 }
5942
5943 // Plausibility check for oops.
5944 void MacroAssembler::verify_oop(Register oop, const char* msg) {
5945 if (!VerifyOops) return;
5946
5947 BLOCK_COMMENT("verify_oop {");
5948 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord;
5949 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address();
5950
5951 save_return_pc();
5952
5953 // Push frame, but preserve flags
5954 z_lgr(Z_R0, Z_SP);
5955 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP);
5956 z_stg(Z_R0, _z_abi(callers_sp), Z_SP);
5957
5958 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true);
5959
5960 lgr_if_needed(Z_ARG2, oop);
5961 load_const_optimized(Z_ARG1, (address)msg);
5962 load_const_optimized(Z_R1, entry_addr);
5963 z_lg(Z_R1, 0, Z_R1);
5964 call_c(Z_R1);
5965
5966 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true);
5967 pop_frame();
5968 restore_return_pc();
5969
5970 BLOCK_COMMENT("} verify_oop ");
5971 }
5972
5973 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) {
5974 if (!VerifyOops) return;
5975
5976 BLOCK_COMMENT("verify_oop {");
5977 unsigned int nbytes_save = (5 + 8) * BytesPerWord;
5978 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address();
5979
5980 save_return_pc();
5981 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0
5982 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false);
5983
5984 z_lg(Z_ARG2, addr.plus_disp(frame_size));
5985 load_const_optimized(Z_ARG1, (address)msg);
5986 load_const_optimized(Z_R1, entry_addr);
5987 z_lg(Z_R1, 0, Z_R1);
5988 call_c(Z_R1);
5989
5990 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false);
5991 pop_frame();
5992 restore_return_pc();
5993
5994 BLOCK_COMMENT("} verify_oop ");
5995 }
5996
5997 const char* MacroAssembler::stop_types[] = {
5998 "stop",
5999 "untested",
6000 "unimplemented",
6001 "shouldnotreachhere"
6002 };
6003
6004 static void stop_on_request(const char* tp, const char* msg) {
6005 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
6006 guarantee(false, "Z assembly code requires stop: %s", msg);
6007 }
6008
6009 void MacroAssembler::stop(int type, const char* msg, int id) {
6010 BLOCK_COMMENT(err_msg("stop: %s {", msg));
6011
6012 // Setup arguments.
6013 load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
6014 load_const(Z_ARG2, (void*) msg);
6015 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
6016 save_return_pc(); // Saves return pc Z_R14.
6017 push_frame_abi160(0);
6018 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6019 // The plain disassembler does not recognize illtrap. It instead displays
6020 // a 32-bit value. Issuing two illtraps assures the disassembler finds
6021 // the proper beginning of the next instruction.
6022 z_illtrap(id); // Illegal instruction.
6023 z_illtrap(id); // Illegal instruction.
6024
6025 BLOCK_COMMENT(" } stop");
6026 }
6027
6028 // Special version of stop() for code size reduction.
6029 // Reuses the previously generated call sequence, if any.
6030 // Generates the call sequence on its own, if necessary.
6031 // Note: This code will work only in non-relocatable code!
6032 // The relative address of the data elements (arg1, arg2) must not change.
6033 // The reentry point must not move relative to it's users. This prerequisite
6034 // should be given for "hand-written" code, if all chain calls are in the same code blob.
6035 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
6036 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
6037 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==nullptr?"init":"cont", allow_relocation?"reloc ":"static", msg));
6038
6039 // Setup arguments.
6040 if (allow_relocation) {
6041 // Relocatable version (for comparison purposes). Remove after some time.
6042 load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
6043 load_const(Z_ARG2, (void*) msg);
6044 } else {
6045 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
6046 load_absolute_address(Z_ARG2, (address)msg);
6047 }
6048 if ((reentry != nullptr) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
6049 BLOCK_COMMENT("branch to reentry point:");
6050 z_brc(bcondAlways, reentry);
6051 } else {
6052 BLOCK_COMMENT("reentry point:");
6053 reentry = pc(); // Re-entry point for subsequent stop calls.
6054 save_return_pc(); // Saves return pc Z_R14.
6055 push_frame_abi160(0);
6056 if (allow_relocation) {
6057 reentry = nullptr; // Prevent reentry if code relocation is allowed.
6058 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6059 } else {
6060 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6061 }
6062 z_illtrap(id); // Illegal instruction as emergency stop, should the above call return.
6063 }
6064 BLOCK_COMMENT(" } stop_chain");
6065
6066 return reentry;
6067 }
6068
6069 // Special version of stop() for code size reduction.
6070 // Assumes constant relative addresses for data and runtime call.
6071 void MacroAssembler::stop_static(int type, const char* msg, int id) {
6072 stop_chain(nullptr, type, msg, id, false);
6073 }
6074
6075 void MacroAssembler::stop_subroutine() {
6076 unimplemented("stop_subroutine", 710);
6077 }
6078
6079 // Prints msg to stdout from within generated code..
6080 void MacroAssembler::warn(const char* msg) {
6081 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
6082 load_absolute_address(Z_R1, (address) warning);
6083 load_absolute_address(Z_ARG1, (address) msg);
6084 (void) call(Z_R1);
6085 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
6086 }
6087
6088 #ifndef PRODUCT
6089
6090 // Write pattern 0x0101010101010101 in region [low-before, high+after].
6091 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
6092 if (!ZapEmptyStackFields) return;
6093 BLOCK_COMMENT("zap memory region {");
6094 load_const_optimized(val, 0x0101010101010101);
6095 int size = before + after;
6096 if (low == high && size < 5 && size > 0) {
6097 int offset = -before*BytesPerWord;
6098 for (int i = 0; i < size; ++i) {
6099 z_stg(val, Address(low, offset));
6100 offset +=(1*BytesPerWord);
6101 }
6102 } else {
6103 add2reg(addr, -before*BytesPerWord, low);
6104 if (after) {
6105 #ifdef ASSERT
6106 jlong check = after * BytesPerWord;
6107 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
6108 #endif
6109 add2reg(high, after * BytesPerWord);
6110 }
6111 NearLabel loop;
6112 bind(loop);
6113 z_stg(val, Address(addr));
6114 add2reg(addr, 8);
6115 compare64_and_branch(addr, high, bcondNotHigh, loop);
6116 if (after) {
6117 add2reg(high, -after * BytesPerWord);
6118 }
6119 }
6120 BLOCK_COMMENT("} zap memory region");
6121 }
6122 #endif // !PRODUCT
6123
6124 // Implements fast-locking.
6125 // - obj: the object to be locked, contents preserved.
6126 // - temp1, temp2: temporary registers, contents destroyed.
6127 // Note: make sure Z_R1 is not manipulated here when C2 compiler is in play
6128 void MacroAssembler::fast_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) {
6129
6130 assert_different_registers(basic_lock, obj, temp1, temp2);
6131
6132 Label push;
6133 const Register top = temp1;
6134 const Register mark = temp2;
6135 const int mark_offset = oopDesc::mark_offset_in_bytes();
6136 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset();
6137
6138 // Preload the markWord. It is important that this is the first
6139 // instruction emitted as it is part of C1's null check semantics.
6140 z_lg(mark, Address(obj, mark_offset));
6141
6142 if (UseObjectMonitorTable) {
6143 // Clear cache in case fast locking succeeds or we need to take the slow-path.
6144 const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())));
6145 z_mvghi(om_cache_addr, 0);
6146 }
6147
6148 if (DiagnoseSyncOnValueBasedClasses != 0) {
6149 load_klass(temp1, obj);
6150 z_tm(Address(temp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class);
6151 z_brne(slow);
6152 }
6153
6154 // First we need to check if the lock-stack has room for pushing the object reference.
6155 z_lgf(top, Address(Z_thread, ls_top_offset));
6156
6157 compareU32_and_branch(top, (unsigned)LockStack::end_offset(), bcondNotLow, slow);
6158
6159 // The underflow check is elided. The recursive check will always fail
6160 // when the lock stack is empty because of the _bad_oop_sentinel field.
6161
6162 // Check for recursion:
6163 z_aghi(top, -oopSize);
6164 z_cg(obj, Address(Z_thread, top));
6165 z_bre(push);
6166
6167 // Check header for monitor (0b10).
6168 z_tmll(mark, markWord::monitor_value);
6169 branch_optimized(bcondNotAllZero, slow);
6170
6171 { // Try to lock. Transition lock bits 0b01 => 0b00
6172 const Register locked_obj = top;
6173 z_oill(mark, markWord::unlocked_value);
6174 z_lgr(locked_obj, mark);
6175 // Clear lock-bits from locked_obj (locked state)
6176 z_xilf(locked_obj, markWord::unlocked_value);
6177 z_csg(mark, locked_obj, mark_offset, obj);
6178 branch_optimized(Assembler::bcondNotEqual, slow);
6179 }
6180
6181 bind(push);
6182
6183 // After successful lock, push object on lock-stack
6184 z_lgf(top, Address(Z_thread, ls_top_offset));
6185 z_stg(obj, Address(Z_thread, top));
6186 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize);
6187 }
6188
6189 // Implements fast-unlocking.
6190 // - obj: the object to be unlocked
6191 // - temp1, temp2: temporary registers, will be destroyed
6192 // - Z_R1_scratch: will be killed in case of Interpreter & C1 Compiler
6193 void MacroAssembler::fast_unlock(Register obj, Register temp1, Register temp2, Label& slow) {
6194
6195 assert_different_registers(obj, temp1, temp2);
6196
6197 Label unlocked, push_and_slow;
6198 const Register mark = temp1;
6199 const Register top = temp2;
6200 const int mark_offset = oopDesc::mark_offset_in_bytes();
6201 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset();
6202
6203 #ifdef ASSERT
6204 {
6205 // The following checks rely on the fact that LockStack is only ever modified by
6206 // its owning thread, even if the lock got inflated concurrently; removal of LockStack
6207 // entries after inflation will happen delayed in that case.
6208
6209 // Check for lock-stack underflow.
6210 NearLabel stack_ok;
6211 z_lgf(top, Address(Z_thread, ls_top_offset));
6212 compareU32_and_branch(top, (unsigned)LockStack::start_offset(), bcondNotLow, stack_ok);
6213 stop("Lock-stack underflow");
6214 bind(stack_ok);
6215 }
6216 #endif // ASSERT
6217
6218 // Check if obj is top of lock-stack.
6219 z_lgf(top, Address(Z_thread, ls_top_offset));
6220 z_aghi(top, -oopSize);
6221 z_cg(obj, Address(Z_thread, top));
6222 branch_optimized(bcondNotEqual, slow);
6223
6224 // pop object from lock-stack
6225 #ifdef ASSERT
6226 const Register temp_top = temp1; // mark is not yet loaded, but be careful
6227 z_agrk(temp_top, top, Z_thread);
6228 z_xc(0, oopSize-1, temp_top, 0, temp_top); // wipe out lock-stack entry
6229 #endif // ASSERT
6230 z_alsi(in_bytes(ls_top_offset), Z_thread, -oopSize); // pop object
6231
6232 // The underflow check is elided. The recursive check will always fail
6233 // when the lock stack is empty because of the _bad_oop_sentinel field.
6234
6235 // Check if recursive. (this is a check for the 2nd object on the stack)
6236 z_aghi(top, -oopSize);
6237 z_cg(obj, Address(Z_thread, top));
6238 branch_optimized(bcondEqual, unlocked);
6239
6240 // Not recursive. Check header for monitor (0b10).
6241 z_lg(mark, Address(obj, mark_offset));
6242 z_tmll(mark, markWord::monitor_value);
6243 z_brnaz(push_and_slow);
6244
6245 #ifdef ASSERT
6246 // Check header not unlocked (0b01).
6247 NearLabel not_unlocked;
6248 z_tmll(mark, markWord::unlocked_value);
6249 z_braz(not_unlocked);
6250 stop("fast_unlock already unlocked");
6251 bind(not_unlocked);
6252 #endif // ASSERT
6253
6254 { // Try to unlock. Transition lock bits 0b00 => 0b01
6255 Register unlocked_obj = top;
6256 z_lgr(unlocked_obj, mark);
6257 z_oill(unlocked_obj, markWord::unlocked_value);
6258 z_csg(mark, unlocked_obj, mark_offset, obj);
6259 branch_optimized(Assembler::bcondEqual, unlocked);
6260 }
6261
6262 bind(push_and_slow);
6263
6264 // Restore lock-stack and handle the unlock in runtime.
6265 z_lgf(top, Address(Z_thread, ls_top_offset));
6266 DEBUG_ONLY(z_stg(obj, Address(Z_thread, top));)
6267 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize);
6268 // set CC to NE
6269 z_ltgr(obj, obj); // object shouldn't be null at this point
6270 branch_optimized(bcondAlways, slow);
6271
6272 bind(unlocked);
6273 }
6274
6275 void MacroAssembler::compiler_fast_lock_object(Register obj, Register box, Register tmp1, Register tmp2) {
6276 assert_different_registers(obj, box, tmp1, tmp2, Z_R0_scratch);
6277
6278 // Handle inflated monitor.
6279 NearLabel inflated;
6280 // Finish fast lock successfully. MUST reach to with flag == NE
6281 NearLabel locked;
6282 // Finish fast lock unsuccessfully. MUST branch to with flag == EQ
6283 NearLabel slow_path;
6284
6285 if (UseObjectMonitorTable) {
6286 // Clear cache in case fast locking succeeds or we need to take the slow-path.
6287 z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0);
6288 }
6289
6290 if (DiagnoseSyncOnValueBasedClasses != 0) {
6291 load_klass(tmp1, obj);
6292 z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class);
6293 z_brne(slow_path);
6294 }
6295
6296 const Register mark = tmp1;
6297 const int mark_offset = oopDesc::mark_offset_in_bytes();
6298 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset();
6299
6300 BLOCK_COMMENT("compiler_fast_locking {");
6301 { // Fast locking
6302
6303 // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ
6304 NearLabel push;
6305
6306 const Register top = tmp2;
6307
6308 // Check if lock-stack is full.
6309 z_lgf(top, Address(Z_thread, ls_top_offset));
6310 compareU32_and_branch(top, (unsigned) LockStack::end_offset() - 1, bcondHigh, slow_path);
6311
6312 // The underflow check is elided. The recursive check will always fail
6313 // when the lock stack is empty because of the _bad_oop_sentinel field.
6314
6315 // Check if recursive.
6316 z_aghi(top, -oopSize);
6317 z_cg(obj, Address(Z_thread, top));
6318 z_bre(push);
6319
6320 // Check for monitor (0b10)
6321 z_lg(mark, Address(obj, mark_offset));
6322 z_tmll(mark, markWord::monitor_value);
6323 z_brnaz(inflated);
6324
6325 // not inflated
6326
6327 { // Try to lock. Transition lock bits 0b01 => 0b00
6328 assert(mark_offset == 0, "required to avoid a lea");
6329 const Register locked_obj = top;
6330 z_oill(mark, markWord::unlocked_value);
6331 z_lgr(locked_obj, mark);
6332 // Clear lock-bits from locked_obj (locked state)
6333 z_xilf(locked_obj, markWord::unlocked_value);
6334 z_csg(mark, locked_obj, mark_offset, obj);
6335 branch_optimized(Assembler::bcondNotEqual, slow_path);
6336 }
6337
6338 bind(push);
6339
6340 // After successful lock, push object on lock-stack.
6341 z_lgf(top, Address(Z_thread, ls_top_offset));
6342 z_stg(obj, Address(Z_thread, top));
6343 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize);
6344
6345 z_cgr(obj, obj); // set the CC to EQ, as it could be changed by alsi
6346 z_bru(locked);
6347 }
6348 BLOCK_COMMENT("} compiler_fast_locking");
6349
6350 BLOCK_COMMENT("handle_inflated_monitor_locking {");
6351 { // Handle inflated monitor.
6352 bind(inflated);
6353
6354 const Register tmp1_monitor = tmp1;
6355 if (!UseObjectMonitorTable) {
6356 assert(tmp1_monitor == mark, "should be the same here");
6357 } else {
6358 const Register tmp1_bucket = tmp1;
6359 const Register hash = Z_R0_scratch;
6360 NearLabel monitor_found;
6361
6362 // Save the mark, we might need it to extract the hash.
6363 z_lgr(hash, mark);
6364
6365 // Look for the monitor in the om_cache.
6366
6367 ByteSize cache_offset = JavaThread::om_cache_oops_offset();
6368 ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
6369 const int num_unrolled = OMCache::CAPACITY;
6370 for (int i = 0; i < num_unrolled; i++) {
6371 z_lg(tmp1_monitor, Address(Z_thread, cache_offset + monitor_offset));
6372 z_cg(obj, Address(Z_thread, cache_offset));
6373 z_bre(monitor_found);
6374 cache_offset = cache_offset + OMCache::oop_to_oop_difference();
6375 }
6376
6377 // Get the hash code.
6378 z_srlg(hash, hash, markWord::hash_shift);
6379
6380 // Get the table and calculate the bucket's address.
6381 load_const_optimized(tmp2, ObjectMonitorTable::current_table_address());
6382 z_lg(tmp2, Address(tmp2));
6383 z_ng(hash, Address(tmp2, ObjectMonitorTable::table_capacity_mask_offset()));
6384 z_lg(tmp1_bucket, Address(tmp2, ObjectMonitorTable::table_buckets_offset()));
6385 z_sllg(hash, hash, LogBytesPerWord);
6386 z_agr(tmp1_bucket, hash);
6387
6388 // Read the monitor from the bucket.
6389 z_lg(tmp1_monitor, Address(tmp1_bucket));
6390
6391 // Check if the monitor in the bucket is special (empty, tombstone or removed).
6392 z_clgfi(tmp1_monitor, ObjectMonitorTable::SpecialPointerValues::below_is_special);
6393 z_brl(slow_path);
6394
6395 // Check if object matches.
6396 z_lg(tmp2, Address(tmp1_monitor, ObjectMonitor::object_offset()));
6397 BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
6398 bs_asm->try_resolve_weak_handle(this, tmp2, Z_R0_scratch, slow_path);
6399 z_cgr(obj, tmp2);
6400 z_brne(slow_path);
6401
6402 bind(monitor_found);
6403 }
6404 NearLabel monitor_locked;
6405 // lock the monitor
6406
6407 const Register zero = tmp2;
6408
6409 const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
6410 const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag);
6411 const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag);
6412
6413
6414 // Try to CAS owner (no owner => current thread's _monitor_owner_id).
6415 // If csg succeeds then CR=EQ, otherwise, register zero is filled
6416 // with the current owner.
6417 z_lghi(zero, 0);
6418 z_lg(Z_R0_scratch, Address(Z_thread, JavaThread::monitor_owner_id_offset()));
6419 z_csg(zero, Z_R0_scratch, owner_address);
6420 z_bre(monitor_locked);
6421
6422 // Check if recursive.
6423 z_cgr(Z_R0_scratch, zero); // zero contains the owner from z_csg instruction
6424 z_brne(slow_path);
6425
6426 // Recursive
6427 z_agsi(recursions_address, 1ll);
6428
6429 bind(monitor_locked);
6430 if (UseObjectMonitorTable) {
6431 // Cache the monitor for unlock
6432 z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
6433 }
6434 // set the CC now
6435 z_cgr(obj, obj);
6436 }
6437 BLOCK_COMMENT("} handle_inflated_monitor_locking");
6438
6439 bind(locked);
6440
6441 #ifdef ASSERT
6442 // Check that locked label is reached with flag == EQ.
6443 NearLabel flag_correct;
6444 z_bre(flag_correct);
6445 stop("CC is not set to EQ, it should be - lock");
6446 #endif // ASSERT
6447
6448 bind(slow_path);
6449
6450 #ifdef ASSERT
6451 // Check that slow_path label is reached with flag == NE.
6452 z_brne(flag_correct);
6453 stop("CC is not set to NE, it should be - lock");
6454 bind(flag_correct);
6455 #endif // ASSERT
6456
6457 // C2 uses the value of flag (NE vs EQ) to determine the continuation.
6458 }
6459
6460 void MacroAssembler::compiler_fast_unlock_object(Register obj, Register box, Register tmp1, Register tmp2) {
6461 assert_different_registers(obj, box, tmp1, tmp2);
6462
6463 // Handle inflated monitor.
6464 NearLabel inflated, inflated_load_mark;
6465 // Finish fast unlock successfully. MUST reach to with flag == EQ.
6466 NearLabel unlocked;
6467 // Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
6468 NearLabel slow_path;
6469
6470 const Register mark = tmp1;
6471 const Register top = tmp2;
6472 const int mark_offset = oopDesc::mark_offset_in_bytes();
6473 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset();
6474
6475 BLOCK_COMMENT("compiler_fast_unlock {");
6476 { // Fast Unlock
6477 NearLabel push_and_slow_path;
6478
6479 // Check if obj is top of lock-stack.
6480 z_lgf(top, Address(Z_thread, ls_top_offset));
6481
6482 z_aghi(top, -oopSize);
6483 z_cg(obj, Address(Z_thread, top));
6484 branch_optimized(bcondNotEqual, inflated_load_mark);
6485
6486 // Pop lock-stack.
6487 #ifdef ASSERT
6488 const Register temp_top = tmp1; // let's not kill top here, we can use for recursive check
6489 z_agrk(temp_top, top, Z_thread);
6490 z_xc(0, oopSize-1, temp_top, 0, temp_top); // wipe out lock-stack entry
6491 #endif
6492 z_alsi(in_bytes(ls_top_offset), Z_thread, -oopSize); // pop object
6493
6494 // The underflow check is elided. The recursive check will always fail
6495 // when the lock stack is empty because of the _bad_oop_sentinel field.
6496
6497 // Check if recursive.
6498 z_aghi(top, -oopSize);
6499 z_cg(obj, Address(Z_thread, top));
6500 z_bre(unlocked);
6501
6502 // Not recursive
6503
6504 // Check for monitor (0b10).
6505 // Because we got here by popping (meaning we pushed in locked)
6506 // there will be no monitor in the box. So we need to push back the obj
6507 // so that the runtime can fix any potential anonymous owner.
6508 z_lg(mark, Address(obj, mark_offset));
6509 z_tmll(mark, markWord::monitor_value);
6510 if (!UseObjectMonitorTable) {
6511 z_brnaz(inflated);
6512 } else {
6513 z_brnaz(push_and_slow_path);
6514 }
6515
6516 #ifdef ASSERT
6517 // Check header not unlocked (0b01).
6518 NearLabel not_unlocked;
6519 z_tmll(mark, markWord::unlocked_value);
6520 z_braz(not_unlocked);
6521 stop("fast_unlock already unlocked");
6522 bind(not_unlocked);
6523 #endif // ASSERT
6524
6525 { // Try to unlock. Transition lock bits 0b00 => 0b01
6526 Register unlocked_obj = top;
6527 z_lgr(unlocked_obj, mark);
6528 z_oill(unlocked_obj, markWord::unlocked_value);
6529 z_csg(mark, unlocked_obj, mark_offset, obj);
6530 branch_optimized(Assembler::bcondEqual, unlocked);
6531 }
6532
6533 bind(push_and_slow_path);
6534 // Restore lock-stack and handle the unlock in runtime.
6535 z_lgf(top, Address(Z_thread, ls_top_offset));
6536 DEBUG_ONLY(z_stg(obj, Address(Z_thread, top));)
6537 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize);
6538 // set CC to NE
6539 z_ltgr(obj, obj); // object is not null here
6540 z_bru(slow_path);
6541 }
6542 BLOCK_COMMENT("} compiler_fast_unlock");
6543
6544 { // Handle inflated monitor.
6545
6546 bind(inflated_load_mark);
6547
6548 z_lg(mark, Address(obj, mark_offset));
6549
6550 #ifdef ASSERT
6551 z_tmll(mark, markWord::monitor_value);
6552 z_brnaz(inflated);
6553 stop("Fast Unlock not monitor");
6554 #endif // ASSERT
6555
6556 bind(inflated);
6557
6558 #ifdef ASSERT
6559 NearLabel check_done, loop;
6560 z_lgf(top, Address(Z_thread, ls_top_offset));
6561 bind(loop);
6562 z_aghi(top, -oopSize);
6563 compareU32_and_branch(top, in_bytes(JavaThread::lock_stack_base_offset()),
6564 bcondLow, check_done);
6565 z_cg(obj, Address(Z_thread, top));
6566 z_brne(loop);
6567 stop("Fast Unlock lock on stack");
6568 bind(check_done);
6569 #endif // ASSERT
6570
6571 const Register tmp1_monitor = tmp1;
6572
6573 if (!UseObjectMonitorTable) {
6574 assert(tmp1_monitor == mark, "should be the same here");
6575 } else {
6576 // Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack.
6577 z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
6578 // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*)
6579 z_cghi(tmp1_monitor, alignof(ObjectMonitor*));
6580
6581 z_brl(slow_path);
6582 }
6583
6584 // mark contains the tagged ObjectMonitor*.
6585 const Register monitor = mark;
6586
6587 const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
6588 const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
6589 const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
6590 const Address entry_list_address{monitor, ObjectMonitor::entry_list_offset() - monitor_tag};
6591 const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
6592
6593 NearLabel not_recursive;
6594 const Register recursions = tmp2;
6595
6596 // Check if recursive.
6597 load_and_test_long(recursions, recursions_address);
6598 z_bre(not_recursive); // if 0 then jump, it's not recursive locking
6599
6600 // Recursive unlock
6601 z_agsi(recursions_address, -1ll);
6602 z_cgr(monitor, monitor); // set the CC to EQUAL
6603 z_bru(unlocked);
6604
6605 bind(not_recursive);
6606
6607 NearLabel set_eq_unlocked;
6608
6609 // Set owner to null.
6610 // Release to satisfy the JMM
6611 z_release();
6612 z_lghi(tmp2, 0);
6613 z_stg(tmp2 /*=0*/, owner_address);
6614 // We need a full fence after clearing owner to avoid stranding.
6615 z_fence();
6616
6617 // Check if the entry_list is empty.
6618 load_and_test_long(tmp2, entry_list_address);
6619 z_bre(unlocked); // If so we are done.
6620
6621 // Check if there is a successor.
6622 load_and_test_long(tmp2, succ_address);
6623 z_brne(set_eq_unlocked); // If so we are done.
6624
6625 // Save the monitor pointer in the current thread, so we can try to
6626 // reacquire the lock in SharedRuntime::monitor_exit_helper().
6627 if (!UseObjectMonitorTable) {
6628 z_xilf(monitor, markWord::monitor_value);
6629 }
6630 z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
6631
6632 z_ltgr(obj, obj); // Set flag = NE
6633 z_bru(slow_path);
6634
6635 bind(set_eq_unlocked);
6636 z_cr(tmp2, tmp2); // Set flag = EQ
6637 }
6638
6639 bind(unlocked);
6640
6641 #ifdef ASSERT
6642 // Check that unlocked label is reached with flag == EQ.
6643 NearLabel flag_correct;
6644 z_bre(flag_correct);
6645 stop("CC is not set to EQ, it should be - unlock");
6646 #endif // ASSERT
6647
6648 bind(slow_path);
6649
6650 #ifdef ASSERT
6651 // Check that slow_path label is reached with flag == NE.
6652 z_brne(flag_correct);
6653 stop("CC is not set to NE, it should be - unlock");
6654 bind(flag_correct);
6655 #endif // ASSERT
6656
6657 // C2 uses the value of flag (NE vs EQ) to determine the continuation.
6658 }
6659
6660 void MacroAssembler::pop_count_int(Register r_dst, Register r_src, Register r_tmp) {
6661 BLOCK_COMMENT("pop_count_int {");
6662
6663 assert(r_tmp != noreg, "temp register required for pop_count_int, as code may run on machine older than z15");
6664 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine
6665
6666 if (VM_Version::has_MiscInstrExt3()) {
6667 pop_count_int_with_ext3(r_dst, r_src);
6668 } else {
6669 pop_count_int_without_ext3(r_dst, r_src, r_tmp);
6670 }
6671
6672 BLOCK_COMMENT("} pop_count_int");
6673 }
6674
6675 void MacroAssembler::pop_count_long(Register r_dst, Register r_src, Register r_tmp) {
6676 BLOCK_COMMENT("pop_count_long {");
6677
6678 assert(r_tmp != noreg, "temp register required for pop_count_long, as code may run on machine older than z15");
6679 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine
6680
6681 if (VM_Version::has_MiscInstrExt3()) {
6682 pop_count_long_with_ext3(r_dst, r_src);
6683 } else {
6684 pop_count_long_without_ext3(r_dst, r_src, r_tmp);
6685 }
6686
6687 BLOCK_COMMENT("} pop_count_long");
6688 }
6689
6690 void MacroAssembler::pop_count_int_without_ext3(Register r_dst, Register r_src, Register r_tmp) {
6691 BLOCK_COMMENT("pop_count_int_without_ext3 {");
6692
6693 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15");
6694 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine
6695
6696 z_popcnt(r_dst, r_src, 0);
6697 z_srlg(r_tmp, r_dst, 16);
6698 z_alr(r_dst, r_tmp);
6699 z_srlg(r_tmp, r_dst, 8);
6700 z_alr(r_dst, r_tmp);
6701 z_llgcr(r_dst, r_dst);
6702
6703 BLOCK_COMMENT("} pop_count_int_without_ext3");
6704 }
6705
6706 void MacroAssembler::pop_count_long_without_ext3(Register r_dst, Register r_src, Register r_tmp) {
6707 BLOCK_COMMENT("pop_count_long_without_ext3 {");
6708
6709 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15");
6710 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine
6711
6712 z_popcnt(r_dst, r_src, 0);
6713 z_ahhlr(r_dst, r_dst, r_dst);
6714 z_sllg(r_tmp, r_dst, 16);
6715 z_algr(r_dst, r_tmp);
6716 z_sllg(r_tmp, r_dst, 8);
6717 z_algr(r_dst, r_tmp);
6718 z_srlg(r_dst, r_dst, 56);
6719
6720 BLOCK_COMMENT("} pop_count_long_without_ext3");
6721 }
6722
6723 void MacroAssembler::pop_count_long_with_ext3(Register r_dst, Register r_src) {
6724 BLOCK_COMMENT("pop_count_long_with_ext3 {");
6725
6726 guarantee(VM_Version::has_MiscInstrExt3(),
6727 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used");
6728 z_popcnt(r_dst, r_src, 8);
6729
6730 BLOCK_COMMENT("} pop_count_long_with_ext3");
6731 }
6732
6733 void MacroAssembler::pop_count_int_with_ext3(Register r_dst, Register r_src) {
6734 BLOCK_COMMENT("pop_count_int_with_ext3 {");
6735
6736 guarantee(VM_Version::has_MiscInstrExt3(),
6737 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used");
6738 z_llgfr(r_dst, r_src);
6739 z_popcnt(r_dst, r_dst, 8);
6740
6741 BLOCK_COMMENT("} pop_count_int_with_ext3");
6742 }
6743
6744 // LOAD HALFWORD IMMEDIATE ON CONDITION (32 <- 16)
6745 void MacroAssembler::load_on_condition_imm_32(Register dst, int64_t i2, branch_condition cc) {
6746 if (VM_Version::has_LoadStoreConditional2()) { // z_lochi works on z13 or above
6747 assert(Assembler::is_simm16(i2), "sanity");
6748 z_lochi(dst, i2, cc);
6749 } else {
6750 NearLabel done;
6751 z_brc(Assembler::inverse_condition(cc), done);
6752 z_lhi(dst, i2);
6753 bind(done);
6754 }
6755 }
6756
6757 // LOAD HALFWORD IMMEDIATE ON CONDITION (64 <- 16)
6758 void MacroAssembler::load_on_condition_imm_64(Register dst, int64_t i2, branch_condition cc) {
6759 if (VM_Version::has_LoadStoreConditional2()) { // z_locghi works on z13 or above
6760 assert(Assembler::is_simm16(i2), "sanity");
6761 z_locghi(dst, i2, cc);
6762 } else {
6763 NearLabel done;
6764 z_brc(Assembler::inverse_condition(cc), done);
6765 z_lghi(dst, i2);
6766 bind(done);
6767 }
6768 }