1 /* 2 * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/codeBuffer.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "code/compiledIC.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/collectedHeap.inline.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "memory/universe.hpp" 38 #include "oops/accessDecorators.hpp" 39 #include "oops/compressedKlass.inline.hpp" 40 #include "oops/compressedOops.inline.hpp" 41 #include "oops/klass.inline.hpp" 42 #include "prims/methodHandles.hpp" 43 #include "registerSaver_s390.hpp" 44 #include "runtime/icache.hpp" 45 #include "runtime/interfaceSupport.inline.hpp" 46 #include "runtime/objectMonitor.hpp" 47 #include "runtime/os.hpp" 48 #include "runtime/safepoint.hpp" 49 #include "runtime/safepointMechanism.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/stubRoutines.hpp" 52 #include "utilities/events.hpp" 53 #include "utilities/macros.hpp" 54 #include "utilities/powerOfTwo.hpp" 55 56 #include <ucontext.h> 57 58 #define BLOCK_COMMENT(str) block_comment(str) 59 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 60 61 // Move 32-bit register if destination and source are different. 62 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 63 if (rs != rd) { z_lr(rd, rs); } 64 } 65 66 // Move register if destination and source are different. 67 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 68 if (rs != rd) { z_lgr(rd, rs); } 69 } 70 71 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 72 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 73 if (rs != rd) { z_llgfr(rd, rs); } 74 } 75 76 // Move float register if destination and source are different. 77 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 78 if (rs != rd) { z_ldr(rd, rs); } 79 } 80 81 // Move integer register if destination and source are different. 82 // It is assumed that shorter-than-int types are already 83 // appropriately sign-extended. 84 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 85 BasicType src_type) { 86 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 87 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 88 89 if (dst_type == src_type) { 90 lgr_if_needed(dst, src); // Just move all 64 bits. 91 return; 92 } 93 94 switch (dst_type) { 95 // Do not support these types for now. 96 // case T_BOOLEAN: 97 case T_BYTE: // signed byte 98 switch (src_type) { 99 case T_INT: 100 z_lgbr(dst, src); 101 break; 102 default: 103 ShouldNotReachHere(); 104 } 105 return; 106 107 case T_CHAR: 108 case T_SHORT: 109 switch (src_type) { 110 case T_INT: 111 if (dst_type == T_CHAR) { 112 z_llghr(dst, src); 113 } else { 114 z_lghr(dst, src); 115 } 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 return; 121 122 case T_INT: 123 switch (src_type) { 124 case T_BOOLEAN: 125 case T_BYTE: 126 case T_CHAR: 127 case T_SHORT: 128 case T_INT: 129 case T_LONG: 130 case T_OBJECT: 131 case T_ARRAY: 132 case T_VOID: 133 case T_ADDRESS: 134 lr_if_needed(dst, src); 135 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 136 return; 137 138 default: 139 assert(false, "non-integer src type"); 140 return; 141 } 142 case T_LONG: 143 switch (src_type) { 144 case T_BOOLEAN: 145 case T_BYTE: 146 case T_CHAR: 147 case T_SHORT: 148 case T_INT: 149 z_lgfr(dst, src); // sign extension 150 return; 151 152 case T_LONG: 153 case T_OBJECT: 154 case T_ARRAY: 155 case T_VOID: 156 case T_ADDRESS: 157 lgr_if_needed(dst, src); 158 return; 159 160 default: 161 assert(false, "non-integer src type"); 162 return; 163 } 164 return; 165 case T_OBJECT: 166 case T_ARRAY: 167 case T_VOID: 168 case T_ADDRESS: 169 switch (src_type) { 170 // These types don't make sense to be converted to pointers: 171 // case T_BOOLEAN: 172 // case T_BYTE: 173 // case T_CHAR: 174 // case T_SHORT: 175 176 case T_INT: 177 z_llgfr(dst, src); // zero extension 178 return; 179 180 case T_LONG: 181 case T_OBJECT: 182 case T_ARRAY: 183 case T_VOID: 184 case T_ADDRESS: 185 lgr_if_needed(dst, src); 186 return; 187 188 default: 189 assert(false, "non-integer src type"); 190 return; 191 } 192 return; 193 default: 194 assert(false, "non-integer dst type"); 195 return; 196 } 197 } 198 199 // Move float register if destination and source are different. 200 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 201 FloatRegister src, BasicType src_type) { 202 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 203 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 204 if (dst_type == src_type) { 205 ldr_if_needed(dst, src); // Just move all 64 bits. 206 } else { 207 switch (dst_type) { 208 case T_FLOAT: 209 assert(src_type == T_DOUBLE, "invalid float type combination"); 210 z_ledbr(dst, src); 211 return; 212 case T_DOUBLE: 213 assert(src_type == T_FLOAT, "invalid float type combination"); 214 z_ldebr(dst, src); 215 return; 216 default: 217 assert(false, "non-float dst type"); 218 return; 219 } 220 } 221 } 222 223 // Optimized emitter for reg to mem operations. 224 // Uses modern instructions if running on modern hardware, classic instructions 225 // otherwise. Prefers (usually shorter) classic instructions if applicable. 226 // Data register (reg) cannot be used as work register. 227 // 228 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 229 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 230 void MacroAssembler::freg2mem_opt(FloatRegister reg, 231 int64_t disp, 232 Register index, 233 Register base, 234 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 235 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 236 Register scratch) { 237 index = (index == noreg) ? Z_R0 : index; 238 if (Displacement::is_shortDisp(disp)) { 239 (this->*classic)(reg, disp, index, base); 240 } else { 241 if (Displacement::is_validDisp(disp)) { 242 (this->*modern)(reg, disp, index, base); 243 } else { 244 if (scratch != Z_R0 && scratch != Z_R1) { 245 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 246 } else { 247 if (scratch != Z_R0) { // scratch == Z_R1 248 if ((scratch == index) || (index == base)) { 249 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 250 } else { 251 add2reg(scratch, disp, base); 252 (this->*classic)(reg, 0, index, scratch); 253 if (base == scratch) { 254 add2reg(base, -disp); // Restore base. 255 } 256 } 257 } else { // scratch == Z_R0 258 z_lgr(scratch, base); 259 add2reg(base, disp); 260 (this->*classic)(reg, 0, index, base); 261 z_lgr(base, scratch); // Restore base. 262 } 263 } 264 } 265 } 266 } 267 268 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 269 if (is_double) { 270 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 271 } else { 272 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 273 } 274 } 275 276 // Optimized emitter for mem to reg operations. 277 // Uses modern instructions if running on modern hardware, classic instructions 278 // otherwise. Prefers (usually shorter) classic instructions if applicable. 279 // data register (reg) cannot be used as work register. 280 // 281 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 282 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 283 void MacroAssembler::mem2freg_opt(FloatRegister reg, 284 int64_t disp, 285 Register index, 286 Register base, 287 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 288 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 289 Register scratch) { 290 index = (index == noreg) ? Z_R0 : index; 291 if (Displacement::is_shortDisp(disp)) { 292 (this->*classic)(reg, disp, index, base); 293 } else { 294 if (Displacement::is_validDisp(disp)) { 295 (this->*modern)(reg, disp, index, base); 296 } else { 297 if (scratch != Z_R0 && scratch != Z_R1) { 298 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 299 } else { 300 if (scratch != Z_R0) { // scratch == Z_R1 301 if ((scratch == index) || (index == base)) { 302 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 303 } else { 304 add2reg(scratch, disp, base); 305 (this->*classic)(reg, 0, index, scratch); 306 if (base == scratch) { 307 add2reg(base, -disp); // Restore base. 308 } 309 } 310 } else { // scratch == Z_R0 311 z_lgr(scratch, base); 312 add2reg(base, disp); 313 (this->*classic)(reg, 0, index, base); 314 z_lgr(base, scratch); // Restore base. 315 } 316 } 317 } 318 } 319 } 320 321 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 322 if (is_double) { 323 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 324 } else { 325 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 326 } 327 } 328 329 // Optimized emitter for reg to mem operations. 330 // Uses modern instructions if running on modern hardware, classic instructions 331 // otherwise. Prefers (usually shorter) classic instructions if applicable. 332 // Data register (reg) cannot be used as work register. 333 // 334 // Don't rely on register locking, instead pass a scratch register 335 // (Z_R0 by default) 336 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 337 void MacroAssembler::reg2mem_opt(Register reg, 338 int64_t disp, 339 Register index, 340 Register base, 341 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 342 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 343 Register scratch) { 344 index = (index == noreg) ? Z_R0 : index; 345 if (Displacement::is_shortDisp(disp)) { 346 (this->*classic)(reg, disp, index, base); 347 } else { 348 if (Displacement::is_validDisp(disp)) { 349 (this->*modern)(reg, disp, index, base); 350 } else { 351 if (scratch != Z_R0 && scratch != Z_R1) { 352 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 353 } else { 354 if (scratch != Z_R0) { // scratch == Z_R1 355 if ((scratch == index) || (index == base)) { 356 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 357 } else { 358 add2reg(scratch, disp, base); 359 (this->*classic)(reg, 0, index, scratch); 360 if (base == scratch) { 361 add2reg(base, -disp); // Restore base. 362 } 363 } 364 } else { // scratch == Z_R0 365 if ((scratch == reg) || (scratch == base) || (reg == base)) { 366 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 367 } else { 368 z_lgr(scratch, base); 369 add2reg(base, disp); 370 (this->*classic)(reg, 0, index, base); 371 z_lgr(base, scratch); // Restore base. 372 } 373 } 374 } 375 } 376 } 377 } 378 379 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 380 int store_offset = offset(); 381 if (is_double) { 382 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 383 } else { 384 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 385 } 386 return store_offset; 387 } 388 389 // Optimized emitter for mem to reg operations. 390 // Uses modern instructions if running on modern hardware, classic instructions 391 // otherwise. Prefers (usually shorter) classic instructions if applicable. 392 // Data register (reg) will be used as work register where possible. 393 void MacroAssembler::mem2reg_opt(Register reg, 394 int64_t disp, 395 Register index, 396 Register base, 397 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 398 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 399 index = (index == noreg) ? Z_R0 : index; 400 if (Displacement::is_shortDisp(disp)) { 401 (this->*classic)(reg, disp, index, base); 402 } else { 403 if (Displacement::is_validDisp(disp)) { 404 (this->*modern)(reg, disp, index, base); 405 } else { 406 if ((reg == index) && (reg == base)) { 407 z_sllg(reg, reg, 1); 408 add2reg(reg, disp); 409 (this->*classic)(reg, 0, noreg, reg); 410 } else if ((reg == index) && (reg != Z_R0)) { 411 add2reg(reg, disp); 412 (this->*classic)(reg, 0, reg, base); 413 } else if (reg == base) { 414 add2reg(reg, disp); 415 (this->*classic)(reg, 0, index, reg); 416 } else if (reg != Z_R0) { 417 add2reg(reg, disp, base); 418 (this->*classic)(reg, 0, index, reg); 419 } else { // reg == Z_R0 && reg != base here 420 add2reg(base, disp); 421 (this->*classic)(reg, 0, index, base); 422 add2reg(base, -disp); 423 } 424 } 425 } 426 } 427 428 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 429 if (is_double) { 430 z_lg(reg, a); 431 } else { 432 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 433 } 434 } 435 436 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 437 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 438 } 439 440 void MacroAssembler::and_imm(Register r, long mask, 441 Register tmp /* = Z_R0 */, 442 bool wide /* = false */) { 443 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 444 445 if (!wide) { 446 z_nilf(r, mask); 447 return; 448 } 449 450 assert(r != tmp, " need a different temporary register !"); 451 load_const_optimized(tmp, mask); 452 z_ngr(r, tmp); 453 } 454 455 // Calculate the 1's complement. 456 // Note: The condition code is neither preserved nor correctly set by this code!!! 457 // Note: (wide == false) does not protect the high order half of the target register 458 // from alteration. It only serves as optimization hint for 32-bit results. 459 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 460 461 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 462 z_xilf(r1, -1); 463 if (wide) { 464 z_xihf(r1, -1); 465 } 466 } else { // Distinct src and dst registers. 467 load_const_optimized(r1, -1); 468 z_xgr(r1, r2); 469 } 470 } 471 472 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 473 assert(lBitPos >= 0, "zero is leftmost bit position"); 474 assert(rBitPos <= 63, "63 is rightmost bit position"); 475 assert(lBitPos <= rBitPos, "inverted selection interval"); 476 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 477 } 478 479 // Helper function for the "Rotate_then_<logicalOP>" emitters. 480 // Rotate src, then mask register contents such that only bits in range survive. 481 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 482 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 483 // The caller must ensure that the selected range only contains bits with defined value. 484 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 485 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 486 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 487 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 488 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 489 // Pre-determine which parts of dst will be zero after shift/rotate. 490 bool llZero = sll4rll && (nRotate >= 16); 491 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 492 bool lfZero = llZero && lhZero; 493 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 494 bool hhZero = (srl4rll && (nRotate <= -16)); 495 bool hfZero = hlZero && hhZero; 496 497 // rotate then mask src operand. 498 // if oneBits == true, all bits outside selected range are 1s. 499 // if oneBits == false, all bits outside selected range are 0s. 500 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 501 if (dst32bit) { 502 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 503 } else { 504 if (sll4rll) { z_sllg(dst, src, nRotate); } 505 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 506 else { z_rllg(dst, src, nRotate); } 507 } 508 } else { 509 if (sll4rll) { z_sllg(dst, src, nRotate); } 510 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 511 else { z_rllg(dst, src, nRotate); } 512 } 513 514 unsigned long range_mask = create_mask(lBitPos, rBitPos); 515 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 516 unsigned int range_mask_l = (unsigned int)range_mask; 517 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 518 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 519 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 520 unsigned short range_mask_ll = (unsigned short)range_mask; 521 // Works for z9 and newer H/W. 522 if (oneBits) { 523 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 524 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 525 } else { 526 // All bits outside range become 0s 527 if (((~range_mask_l) != 0) && !lfZero) { 528 z_nilf(dst, range_mask_l); 529 } 530 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 531 z_nihf(dst, range_mask_h); 532 } 533 } 534 } 535 536 // Rotate src, then insert selected range from rotated src into dst. 537 // Clear dst before, if requested. 538 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 539 int nRotate, bool clear_dst) { 540 // This version does not depend on src being zero-extended int2long. 541 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 542 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 543 } 544 545 // Rotate src, then and selected range from rotated src into dst. 546 // Set condition code only if so requested. Otherwise it is unpredictable. 547 // See performance note in macroAssembler_s390.hpp for important information. 548 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 549 int nRotate, bool test_only) { 550 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 551 // This version does not depend on src being zero-extended int2long. 552 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 553 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 554 } 555 556 // Rotate src, then or selected range from rotated src into dst. 557 // Set condition code only if so requested. Otherwise it is unpredictable. 558 // See performance note in macroAssembler_s390.hpp for important information. 559 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 560 int nRotate, bool test_only) { 561 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 562 // This version does not depend on src being zero-extended int2long. 563 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 564 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 565 } 566 567 // Rotate src, then xor selected range from rotated src into dst. 568 // Set condition code only if so requested. Otherwise it is unpredictable. 569 // See performance note in macroAssembler_s390.hpp for important information. 570 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 571 int nRotate, bool test_only) { 572 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 573 // This version does not depend on src being zero-extended int2long. 574 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 575 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 576 } 577 578 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 579 if (inc.is_register()) { 580 z_agr(r1, inc.as_register()); 581 } else { // constant 582 intptr_t imm = inc.as_constant(); 583 add2reg(r1, imm); 584 } 585 } 586 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 587 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 588 // calculation and is thus rather slow. 589 // 590 // There is no handling for special cases, e.g. cval==0 or cval==1. 591 // 592 // Returns len of generated code block. 593 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 594 int block_start = offset(); 595 596 bool sign_flip = cval < 0; 597 cval = sign_flip ? -cval : cval; 598 599 BLOCK_COMMENT("Reg64*Con16 {"); 600 601 int bit1 = cval & -cval; 602 if (bit1 == cval) { 603 z_sllg(rval, rval, exact_log2(bit1)); 604 if (sign_flip) { z_lcgr(rval, rval); } 605 } else { 606 int bit2 = (cval-bit1) & -(cval-bit1); 607 if ((bit1+bit2) == cval) { 608 z_sllg(work, rval, exact_log2(bit1)); 609 z_sllg(rval, rval, exact_log2(bit2)); 610 z_agr(rval, work); 611 if (sign_flip) { z_lcgr(rval, rval); } 612 } else { 613 if (sign_flip) { z_mghi(rval, -cval); } 614 else { z_mghi(rval, cval); } 615 } 616 } 617 BLOCK_COMMENT("} Reg64*Con16"); 618 619 int block_end = offset(); 620 return block_end - block_start; 621 } 622 623 // Generic operation r1 := r2 + imm. 624 // 625 // Should produce the best code for each supported CPU version. 626 // r2 == noreg yields r1 := r1 + imm 627 // imm == 0 emits either no instruction or r1 := r2 ! 628 // NOTES: 1) Don't use this function where fixed sized 629 // instruction sequences are required!!! 630 // 2) Don't use this function if condition code 631 // setting is required! 632 // 3) Despite being declared as int64_t, the parameter imm 633 // must be a simm_32 value (= signed 32-bit integer). 634 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 635 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 636 637 if (r2 == noreg) { r2 = r1; } 638 639 // Handle special case imm == 0. 640 if (imm == 0) { 641 lgr_if_needed(r1, r2); 642 // Nothing else to do. 643 return; 644 } 645 646 if (!PreferLAoverADD || (r2 == Z_R0)) { 647 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 648 649 // Can we encode imm in 16 bits signed? 650 if (Immediate::is_simm16(imm)) { 651 if (r1 == r2) { 652 z_aghi(r1, imm); 653 return; 654 } 655 if (distinctOpnds) { 656 z_aghik(r1, r2, imm); 657 return; 658 } 659 z_lgr(r1, r2); 660 z_aghi(r1, imm); 661 return; 662 } 663 } else { 664 // Can we encode imm in 12 bits unsigned? 665 if (Displacement::is_shortDisp(imm)) { 666 z_la(r1, imm, r2); 667 return; 668 } 669 // Can we encode imm in 20 bits signed? 670 if (Displacement::is_validDisp(imm)) { 671 // Always use LAY instruction, so we don't need the tmp register. 672 z_lay(r1, imm, r2); 673 return; 674 } 675 676 } 677 678 // Can handle it (all possible values) with long immediates. 679 lgr_if_needed(r1, r2); 680 z_agfi(r1, imm); 681 } 682 683 // Generic operation r := b + x + d 684 // 685 // Addition of several operands with address generation semantics - sort of: 686 // - no restriction on the registers. Any register will do for any operand. 687 // - x == noreg: operand will be disregarded. 688 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 689 // - x == Z_R0: just disregard 690 // - b == Z_R0: use as operand. This is not address generation semantics!!! 691 // 692 // The same restrictions as on add2reg() are valid!!! 693 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 694 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 695 696 if (x == noreg) { x = Z_R0; } 697 if (b == noreg) { b = r; } 698 699 // Handle special case x == R0. 700 if (x == Z_R0) { 701 // Can simply add the immediate value to the base register. 702 add2reg(r, d, b); 703 return; 704 } 705 706 if (!PreferLAoverADD || (b == Z_R0)) { 707 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 708 // Handle special case d == 0. 709 if (d == 0) { 710 if (b == x) { z_sllg(r, b, 1); return; } 711 if (r == x) { z_agr(r, b); return; } 712 if (r == b) { z_agr(r, x); return; } 713 if (distinctOpnds) { z_agrk(r, x, b); return; } 714 z_lgr(r, b); 715 z_agr(r, x); 716 } else { 717 if (x == b) { z_sllg(r, x, 1); } 718 else if (r == x) { z_agr(r, b); } 719 else if (r == b) { z_agr(r, x); } 720 else if (distinctOpnds) { z_agrk(r, x, b); } 721 else { 722 z_lgr(r, b); 723 z_agr(r, x); 724 } 725 add2reg(r, d); 726 } 727 } else { 728 // Can we encode imm in 12 bits unsigned? 729 if (Displacement::is_shortDisp(d)) { 730 z_la(r, d, x, b); 731 return; 732 } 733 // Can we encode imm in 20 bits signed? 734 if (Displacement::is_validDisp(d)) { 735 z_lay(r, d, x, b); 736 return; 737 } 738 z_la(r, 0, x, b); 739 add2reg(r, d); 740 } 741 } 742 743 // Generic emitter (32bit) for direct memory increment. 744 // For optimal code, do not specify Z_R0 as temp register. 745 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 746 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 747 z_asi(a, imm); 748 } else { 749 z_lgf(tmp, a); 750 add2reg(tmp, imm); 751 z_st(tmp, a); 752 } 753 } 754 755 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 756 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 757 z_agsi(a, imm); 758 } else { 759 z_lg(tmp, a); 760 add2reg(tmp, imm); 761 z_stg(tmp, a); 762 } 763 } 764 765 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 766 switch (size_in_bytes) { 767 case 8: z_lg(dst, src); break; 768 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 769 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 770 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 771 default: ShouldNotReachHere(); 772 } 773 } 774 775 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 776 switch (size_in_bytes) { 777 case 8: z_stg(src, dst); break; 778 case 4: z_st(src, dst); break; 779 case 2: z_sth(src, dst); break; 780 case 1: z_stc(src, dst); break; 781 default: ShouldNotReachHere(); 782 } 783 } 784 785 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 786 // a high-order summand in register tmp. 787 // 788 // return value: < 0: No split required, si20 actually has property uimm12. 789 // >= 0: Split performed. Use return value as uimm12 displacement and 790 // tmp as index register. 791 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 792 assert(Immediate::is_simm20(si20_offset), "sanity"); 793 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 794 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 795 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 796 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 797 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 798 799 Register work = accumulate? Z_R0 : tmp; 800 801 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 802 z_lghi(work, ll_off>>12); // Implicit sign extension. 803 z_slag(work, work, 12); 804 } else { // Len of code = 0..10. 805 if (ll_off == 0) { return -1; } 806 // ll_off has 8 significant bits (at most) plus sign. 807 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 808 z_llilh(work, ll_off >> 16); 809 if (ll_off < 0) { // Sign-extension required. 810 z_lgfr(work, work); 811 } 812 } else { 813 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 814 z_llill(work, ll_off); 815 } else { // Non-zero bits in both halfbytes. 816 z_lghi(work, ll_off>>12); // Implicit sign extension. 817 z_slag(work, work, 12); 818 } 819 } 820 } 821 if (accumulate) { z_algr(tmp, work); } // len of code += 4 822 return lg_off; 823 } 824 825 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 826 if (Displacement::is_validDisp(si20)) { 827 z_ley(t, si20, a); 828 } else { 829 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 830 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 831 // pool loads). 832 bool accumulate = true; 833 bool fixed_codelen = true; 834 Register work; 835 836 if (fixed_codelen) { 837 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 838 } else { 839 accumulate = (a == tmp); 840 } 841 work = tmp; 842 843 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 844 if (disp12 < 0) { 845 z_le(t, si20, work); 846 } else { 847 if (accumulate) { 848 z_le(t, disp12, work); 849 } else { 850 z_le(t, disp12, work, a); 851 } 852 } 853 } 854 } 855 856 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 857 if (Displacement::is_validDisp(si20)) { 858 z_ldy(t, si20, a); 859 } else { 860 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 861 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 862 // pool loads). 863 bool accumulate = true; 864 bool fixed_codelen = true; 865 Register work; 866 867 if (fixed_codelen) { 868 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 869 } else { 870 accumulate = (a == tmp); 871 } 872 work = tmp; 873 874 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 875 if (disp12 < 0) { 876 z_ld(t, si20, work); 877 } else { 878 if (accumulate) { 879 z_ld(t, disp12, work); 880 } else { 881 z_ld(t, disp12, work, a); 882 } 883 } 884 } 885 } 886 887 // PCrelative TOC access. 888 // Returns distance (in bytes) from current position to start of consts section. 889 // Returns 0 (zero) if no consts section exists or if it has size zero. 890 long MacroAssembler::toc_distance() { 891 CodeSection* cs = code()->consts(); 892 return (long)((cs != nullptr) ? cs->start()-pc() : 0); 893 } 894 895 // Implementation on x86/sparc assumes that constant and instruction section are 896 // adjacent, but this doesn't hold. Two special situations may occur, that we must 897 // be able to handle: 898 // 1. const section may be located apart from the inst section. 899 // 2. const section may be empty 900 // In both cases, we use the const section's start address to compute the "TOC", 901 // this seems to occur only temporarily; in the final step we always seem to end up 902 // with the pc-relatice variant. 903 // 904 // PC-relative offset could be +/-2**32 -> use long for disp 905 // Furthermore: makes no sense to have special code for 906 // adjacent const and inst sections. 907 void MacroAssembler::load_toc(Register Rtoc) { 908 // Simply use distance from start of const section (should be patched in the end). 909 long disp = toc_distance(); 910 911 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 912 relocate(rspec); 913 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 914 } 915 916 // PCrelative TOC access. 917 // Load from anywhere pcrelative (with relocation of load instr) 918 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 919 address pc = this->pc(); 920 ptrdiff_t total_distance = dataLocation - pc; 921 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 922 923 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 924 assert(total_distance != 0, "sanity"); 925 926 // Some extra safety net. 927 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 928 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 929 } 930 931 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 932 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 933 } 934 935 936 // PCrelative TOC access. 937 // Load from anywhere pcrelative (with relocation of load instr) 938 // loaded addr has to be relocated when added to constant pool. 939 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 940 address pc = this->pc(); 941 ptrdiff_t total_distance = addrLocation - pc; 942 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 943 944 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 945 946 // Some extra safety net. 947 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 948 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 949 } 950 951 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 952 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 953 } 954 955 // Generic operation: load a value from memory and test. 956 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 957 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 958 z_lb(dst, a); 959 z_ltr(dst, dst); 960 } 961 962 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 963 int64_t disp = a.disp20(); 964 if (Displacement::is_shortDisp(disp)) { 965 z_lh(dst, a); 966 } else if (Displacement::is_longDisp(disp)) { 967 z_lhy(dst, a); 968 } else { 969 guarantee(false, "displacement out of range"); 970 } 971 z_ltr(dst, dst); 972 } 973 974 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 975 z_lt(dst, a); 976 } 977 978 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 979 z_ltgf(dst, a); 980 } 981 982 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 983 z_ltg(dst, a); 984 } 985 986 // Test a bit in memory. 987 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 988 assert(a.index() == noreg, "no index reg allowed in testbit"); 989 if (bit <= 7) { 990 z_tm(a.disp() + 3, a.base(), 1 << bit); 991 } else if (bit <= 15) { 992 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 993 } else if (bit <= 23) { 994 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 995 } else if (bit <= 31) { 996 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 997 } else { 998 ShouldNotReachHere(); 999 } 1000 } 1001 1002 // Test a bit in a register. Result is reflected in CC. 1003 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1004 if (bitPos < 16) { 1005 z_tmll(r, 1U<<bitPos); 1006 } else if (bitPos < 32) { 1007 z_tmlh(r, 1U<<(bitPos-16)); 1008 } else if (bitPos < 48) { 1009 z_tmhl(r, 1U<<(bitPos-32)); 1010 } else if (bitPos < 64) { 1011 z_tmhh(r, 1U<<(bitPos-48)); 1012 } else { 1013 ShouldNotReachHere(); 1014 } 1015 } 1016 1017 void MacroAssembler::prefetch_read(Address a) { 1018 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1019 } 1020 void MacroAssembler::prefetch_update(Address a) { 1021 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1022 } 1023 1024 // Clear a register, i.e. load const zero into reg. 1025 // Return len (in bytes) of generated instruction(s). 1026 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1027 // set_cc: Use instruction that sets the condition code, if true. 1028 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1029 unsigned int start_off = offset(); 1030 if (whole_reg) { 1031 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1032 } else { // Only 32bit register. 1033 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1034 } 1035 return offset() - start_off; 1036 } 1037 1038 #ifdef ASSERT 1039 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1040 switch (pattern_len) { 1041 case 1: 1042 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1043 case 2: 1044 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1045 case 4: 1046 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1047 case 8: 1048 return load_const_optimized_rtn_len(r, pattern, true); 1049 break; 1050 default: 1051 guarantee(false, "preset_reg: bad len"); 1052 } 1053 return 0; 1054 } 1055 #endif 1056 1057 // addr: Address descriptor of memory to clear. Index register will not be used! 1058 // size: Number of bytes to clear. 1059 // condition code will not be preserved. 1060 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1061 // !!! Use store_const() instead !!! 1062 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) { 1063 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large"); 1064 1065 switch (size) { 1066 case 0: 1067 return; 1068 case 1: 1069 z_mvi(addr, 0); 1070 return; 1071 case 2: 1072 z_mvhhi(addr, 0); 1073 return; 1074 case 4: 1075 z_mvhi(addr, 0); 1076 return; 1077 case 8: 1078 z_mvghi(addr, 0); 1079 return; 1080 default: ; // Fallthru to xc. 1081 } 1082 1083 // Caution: the emitter with Address operands does implicitly decrement the length 1084 if (size <= 256) { 1085 z_xc(addr, size, addr); 1086 } else { 1087 unsigned int offset = addr.disp(); 1088 unsigned int incr = 256; 1089 for (unsigned int i = 0; i <= size-incr; i += incr) { 1090 z_xc(offset, incr - 1, addr.base(), offset, addr.base()); 1091 offset += incr; 1092 } 1093 unsigned int rest = size - (offset - addr.disp()); 1094 if (size > 0) { 1095 z_xc(offset, rest-1, addr.base(), offset, addr.base()); 1096 } 1097 } 1098 } 1099 1100 void MacroAssembler::align(int modulus) { 1101 align(modulus, offset()); 1102 } 1103 1104 void MacroAssembler::align(int modulus, int target) { 1105 assert(((modulus % 2 == 0) && (target % 2 == 0)), "needs to be even"); 1106 int delta = target - offset(); 1107 while ((offset() + delta) % modulus != 0) z_nop(); 1108 } 1109 1110 // Special version for non-relocateable code if required alignment 1111 // is larger than CodeEntryAlignment. 1112 void MacroAssembler::align_address(int modulus) { 1113 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1114 } 1115 1116 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1117 Register temp_reg, 1118 int64_t extra_slot_offset) { 1119 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1120 // which issues an unnecessary add instruction. 1121 int stackElementSize = Interpreter::stackElementSize; 1122 int64_t offset = extra_slot_offset * stackElementSize; 1123 const Register argbase = Z_esp; 1124 if (arg_slot.is_constant()) { 1125 offset += arg_slot.as_constant() * stackElementSize; 1126 return Address(argbase, offset); 1127 } 1128 // else 1129 assert(temp_reg != noreg, "must specify"); 1130 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1131 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1132 return Address(argbase, temp_reg, offset); 1133 } 1134 1135 1136 //=================================================================== 1137 //=== START C O N S T A N T S I N C O D E S T R E A M === 1138 //=================================================================== 1139 //=== P A T CH A B L E C O N S T A N T S === 1140 //=================================================================== 1141 1142 1143 //--------------------------------------------------- 1144 // Load (patchable) constant into register 1145 //--------------------------------------------------- 1146 1147 1148 // Load absolute address (and try to optimize). 1149 // Note: This method is usable only for position-fixed code, 1150 // referring to a position-fixed target location. 1151 // If not so, relocations and patching must be used. 1152 void MacroAssembler::load_absolute_address(Register d, address addr) { 1153 assert(addr != nullptr, "should not happen"); 1154 BLOCK_COMMENT("load_absolute_address:"); 1155 if (addr == nullptr) { 1156 z_larl(d, pc()); // Dummy emit for size calc. 1157 return; 1158 } 1159 1160 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1161 z_larl(d, addr); 1162 return; 1163 } 1164 1165 load_const_optimized(d, (long)addr); 1166 } 1167 1168 // Load a 64bit constant. 1169 // Patchable code sequence, but not atomically patchable. 1170 // Make sure to keep code size constant -> no value-dependent optimizations. 1171 // Do not kill condition code. 1172 void MacroAssembler::load_const(Register t, long x) { 1173 // Note: Right shift is only cleanly defined for unsigned types 1174 // or for signed types with nonnegative values. 1175 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1176 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1177 } 1178 1179 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1180 // Patchable code sequence, but not atomically patchable. 1181 // Make sure to keep code size constant -> no value-dependent optimizations. 1182 // Do not kill condition code. 1183 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1184 if (sign_extend) { Assembler::z_lgfi(t, x); } 1185 else { Assembler::z_llilf(t, x); } 1186 } 1187 1188 // Load narrow oop constant, no decompression. 1189 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1190 assert(UseCompressedOops, "must be on to call this method"); 1191 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1192 } 1193 1194 // Load narrow klass constant, compression required. 1195 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1196 assert(UseCompressedClassPointers, "must be on to call this method"); 1197 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1198 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1199 } 1200 1201 //------------------------------------------------------ 1202 // Compare (patchable) constant with register. 1203 //------------------------------------------------------ 1204 1205 // Compare narrow oop in reg with narrow oop constant, no decompression. 1206 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1207 assert(UseCompressedOops, "must be on to call this method"); 1208 1209 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1210 } 1211 1212 // Compare narrow oop in reg with narrow oop constant, no decompression. 1213 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1214 assert(UseCompressedClassPointers, "must be on to call this method"); 1215 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1216 1217 Assembler::z_clfi(klass1, encoded_k); 1218 } 1219 1220 //---------------------------------------------------------- 1221 // Check which kind of load_constant we have here. 1222 //---------------------------------------------------------- 1223 1224 // Detection of CPU version dependent load_const sequence. 1225 // The detection is valid only for code sequences generated by load_const, 1226 // not load_const_optimized. 1227 bool MacroAssembler::is_load_const(address a) { 1228 unsigned long inst1, inst2; 1229 unsigned int len1, len2; 1230 1231 len1 = get_instruction(a, &inst1); 1232 len2 = get_instruction(a + len1, &inst2); 1233 1234 return is_z_iihf(inst1) && is_z_iilf(inst2); 1235 } 1236 1237 // Detection of CPU version dependent load_const_32to64 sequence. 1238 // Mostly used for narrow oops and narrow Klass pointers. 1239 // The detection is valid only for code sequences generated by load_const_32to64. 1240 bool MacroAssembler::is_load_const_32to64(address pos) { 1241 unsigned long inst1, inst2; 1242 unsigned int len1; 1243 1244 len1 = get_instruction(pos, &inst1); 1245 return is_z_llilf(inst1); 1246 } 1247 1248 // Detection of compare_immediate_narrow sequence. 1249 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1250 bool MacroAssembler::is_compare_immediate32(address pos) { 1251 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1252 } 1253 1254 // Detection of compare_immediate_narrow sequence. 1255 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1256 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1257 return is_compare_immediate32(pos); 1258 } 1259 1260 // Detection of compare_immediate_narrow sequence. 1261 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1262 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1263 return is_compare_immediate32(pos); 1264 } 1265 1266 //----------------------------------- 1267 // patch the load_constant 1268 //----------------------------------- 1269 1270 // CPU-version dependent patching of load_const. 1271 void MacroAssembler::patch_const(address a, long x) { 1272 assert(is_load_const(a), "not a load of a constant"); 1273 // Note: Right shift is only cleanly defined for unsigned types 1274 // or for signed types with nonnegative values. 1275 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1276 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1277 } 1278 1279 // Patching the value of CPU version dependent load_const_32to64 sequence. 1280 // The passed ptr MUST be in compressed format! 1281 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1282 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1283 1284 set_imm32(pos, np); 1285 return 6; 1286 } 1287 1288 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1289 // The passed ptr MUST be in compressed format! 1290 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1291 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1292 1293 set_imm32(pos, np); 1294 return 6; 1295 } 1296 1297 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1298 // The passed ptr must NOT be in compressed format! 1299 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1300 assert(UseCompressedOops, "Can only patch compressed oops"); 1301 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1302 } 1303 1304 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1305 // The passed ptr must NOT be in compressed format! 1306 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1307 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1308 1309 narrowKlass nk = CompressedKlassPointers::encode(k); 1310 return patch_load_const_32to64(pos, nk); 1311 } 1312 1313 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1314 // The passed ptr must NOT be in compressed format! 1315 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1316 assert(UseCompressedOops, "Can only patch compressed oops"); 1317 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1318 } 1319 1320 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1321 // The passed ptr must NOT be in compressed format! 1322 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1323 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1324 1325 narrowKlass nk = CompressedKlassPointers::encode(k); 1326 return patch_compare_immediate_32(pos, nk); 1327 } 1328 1329 //------------------------------------------------------------------------ 1330 // Extract the constant from a load_constant instruction stream. 1331 //------------------------------------------------------------------------ 1332 1333 // Get constant from a load_const sequence. 1334 long MacroAssembler::get_const(address a) { 1335 assert(is_load_const(a), "not a load of a constant"); 1336 unsigned long x; 1337 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1338 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1339 return (long) x; 1340 } 1341 1342 //-------------------------------------- 1343 // Store a constant in memory. 1344 //-------------------------------------- 1345 1346 // General emitter to move a constant to memory. 1347 // The store is atomic. 1348 // o Address must be given in RS format (no index register) 1349 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1350 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1351 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1352 // o Memory slot must be at least as wide as constant, will assert otherwise. 1353 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1354 int MacroAssembler::store_const(const Address &dest, long imm, 1355 unsigned int lm, unsigned int lc, 1356 Register scratch) { 1357 int64_t disp = dest.disp(); 1358 Register base = dest.base(); 1359 assert(!dest.has_index(), "not supported"); 1360 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1361 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1362 assert(lm>=lc, "memory slot too small"); 1363 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1364 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1365 1366 bool is_shortDisp = Displacement::is_shortDisp(disp); 1367 int store_offset = -1; 1368 1369 // For target len == 1 it's easy. 1370 if (lm == 1) { 1371 store_offset = offset(); 1372 if (is_shortDisp) { 1373 z_mvi(disp, base, imm); 1374 return store_offset; 1375 } else { 1376 z_mviy(disp, base, imm); 1377 return store_offset; 1378 } 1379 } 1380 1381 // All the "good stuff" takes an unsigned displacement. 1382 if (is_shortDisp) { 1383 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1384 1385 store_offset = offset(); 1386 switch (lm) { 1387 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1388 z_mvhhi(disp, base, imm); 1389 return store_offset; 1390 case 4: 1391 if (Immediate::is_simm16(imm)) { 1392 z_mvhi(disp, base, imm); 1393 return store_offset; 1394 } 1395 break; 1396 case 8: 1397 if (Immediate::is_simm16(imm)) { 1398 z_mvghi(disp, base, imm); 1399 return store_offset; 1400 } 1401 break; 1402 default: 1403 ShouldNotReachHere(); 1404 break; 1405 } 1406 } 1407 1408 // Can't optimize, so load value and store it. 1409 guarantee(scratch != noreg, " need a scratch register here !"); 1410 if (imm != 0) { 1411 load_const_optimized(scratch, imm); // Preserves CC anyway. 1412 } else { 1413 // Leave CC alone!! 1414 (void) clear_reg(scratch, true, false); // Indicate unused result. 1415 } 1416 1417 store_offset = offset(); 1418 if (is_shortDisp) { 1419 switch (lm) { 1420 case 2: 1421 z_sth(scratch, disp, Z_R0, base); 1422 return store_offset; 1423 case 4: 1424 z_st(scratch, disp, Z_R0, base); 1425 return store_offset; 1426 case 8: 1427 z_stg(scratch, disp, Z_R0, base); 1428 return store_offset; 1429 default: 1430 ShouldNotReachHere(); 1431 break; 1432 } 1433 } else { 1434 switch (lm) { 1435 case 2: 1436 z_sthy(scratch, disp, Z_R0, base); 1437 return store_offset; 1438 case 4: 1439 z_sty(scratch, disp, Z_R0, base); 1440 return store_offset; 1441 case 8: 1442 z_stg(scratch, disp, Z_R0, base); 1443 return store_offset; 1444 default: 1445 ShouldNotReachHere(); 1446 break; 1447 } 1448 } 1449 return -1; // should not reach here 1450 } 1451 1452 //=================================================================== 1453 //=== N O T P A T CH A B L E C O N S T A N T S === 1454 //=================================================================== 1455 1456 // Load constant x into register t with a fast instruction sequence 1457 // depending on the bits in x. Preserves CC under all circumstances. 1458 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1459 if (x == 0) { 1460 int len; 1461 if (emit) { 1462 len = clear_reg(t, true, false); 1463 } else { 1464 len = 4; 1465 } 1466 return len; 1467 } 1468 1469 if (Immediate::is_simm16(x)) { 1470 if (emit) { z_lghi(t, x); } 1471 return 4; 1472 } 1473 1474 // 64 bit value: | part1 | part2 | part3 | part4 | 1475 // At least one part is not zero! 1476 // Note: Right shift is only cleanly defined for unsigned types 1477 // or for signed types with nonnegative values. 1478 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1479 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1480 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1481 int part4 = (int)x & 0x0000ffff; 1482 int part12 = (int)((unsigned long)x >> 32); 1483 int part34 = (int)x; 1484 1485 // Lower word only (unsigned). 1486 if (part12 == 0) { 1487 if (part3 == 0) { 1488 if (emit) z_llill(t, part4); 1489 return 4; 1490 } 1491 if (part4 == 0) { 1492 if (emit) z_llilh(t, part3); 1493 return 4; 1494 } 1495 if (emit) z_llilf(t, part34); 1496 return 6; 1497 } 1498 1499 // Upper word only. 1500 if (part34 == 0) { 1501 if (part1 == 0) { 1502 if (emit) z_llihl(t, part2); 1503 return 4; 1504 } 1505 if (part2 == 0) { 1506 if (emit) z_llihh(t, part1); 1507 return 4; 1508 } 1509 if (emit) z_llihf(t, part12); 1510 return 6; 1511 } 1512 1513 // Lower word only (signed). 1514 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1515 if (emit) z_lgfi(t, part34); 1516 return 6; 1517 } 1518 1519 int len = 0; 1520 1521 if ((part1 == 0) || (part2 == 0)) { 1522 if (part1 == 0) { 1523 if (emit) z_llihl(t, part2); 1524 len += 4; 1525 } else { 1526 if (emit) z_llihh(t, part1); 1527 len += 4; 1528 } 1529 } else { 1530 if (emit) z_llihf(t, part12); 1531 len += 6; 1532 } 1533 1534 if ((part3 == 0) || (part4 == 0)) { 1535 if (part3 == 0) { 1536 if (emit) z_iill(t, part4); 1537 len += 4; 1538 } else { 1539 if (emit) z_iilh(t, part3); 1540 len += 4; 1541 } 1542 } else { 1543 if (emit) z_iilf(t, part34); 1544 len += 6; 1545 } 1546 return len; 1547 } 1548 1549 //===================================================================== 1550 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1551 //===================================================================== 1552 1553 // Note: In the worst case, one of the scratch registers is destroyed!!! 1554 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1555 // Right operand is constant. 1556 if (x2.is_constant()) { 1557 jlong value = x2.as_constant(); 1558 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1559 return; 1560 } 1561 1562 // Right operand is in register. 1563 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1564 } 1565 1566 // Note: In the worst case, one of the scratch registers is destroyed!!! 1567 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1568 // Right operand is constant. 1569 if (x2.is_constant()) { 1570 jlong value = x2.as_constant(); 1571 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1572 return; 1573 } 1574 1575 // Right operand is in register. 1576 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1577 } 1578 1579 // Note: In the worst case, one of the scratch registers is destroyed!!! 1580 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1581 // Right operand is constant. 1582 if (x2.is_constant()) { 1583 jlong value = x2.as_constant(); 1584 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1585 return; 1586 } 1587 1588 // Right operand is in register. 1589 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1590 } 1591 1592 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1593 // Right operand is constant. 1594 if (x2.is_constant()) { 1595 jlong value = x2.as_constant(); 1596 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1597 return; 1598 } 1599 1600 // Right operand is in register. 1601 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1602 } 1603 1604 // Generate an optimal branch to the branch target. 1605 // Optimal means that a relative branch (brc or brcl) is used if the 1606 // branch distance is short enough. Loading the target address into a 1607 // register and branching via reg is used as fallback only. 1608 // 1609 // Used registers: 1610 // Z_R1 - work reg. Holds branch target address. 1611 // Used in fallback case only. 1612 // 1613 // This version of branch_optimized is good for cases where the target address is known 1614 // and constant, i.e. is never changed (no relocation, no patching). 1615 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1616 address branch_origin = pc(); 1617 1618 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1619 z_brc(cond, branch_addr); 1620 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1621 z_brcl(cond, branch_addr); 1622 } else { 1623 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1624 z_bcr(cond, Z_R1); 1625 } 1626 } 1627 1628 // This version of branch_optimized is good for cases where the target address 1629 // is potentially not yet known at the time the code is emitted. 1630 // 1631 // One very common case is a branch to an unbound label which is handled here. 1632 // The caller might know (or hope) that the branch distance is short enough 1633 // to be encoded in a 16bit relative address. In this case he will pass a 1634 // NearLabel branch_target. 1635 // Care must be taken with unbound labels. Each call to target(label) creates 1636 // an entry in the patch queue for that label to patch all references of the label 1637 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1638 // an assertion fires at patch time. 1639 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1640 if (branch_target.is_bound()) { 1641 address branch_addr = target(branch_target); 1642 branch_optimized(cond, branch_addr); 1643 } else if (branch_target.is_near()) { 1644 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1645 } else { 1646 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1647 } 1648 } 1649 1650 // Generate an optimal compare and branch to the branch target. 1651 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1652 // branch distance is short enough. Loading the target address into a 1653 // register and branching via reg is used as fallback only. 1654 // 1655 // Input: 1656 // r1 - left compare operand 1657 // r2 - right compare operand 1658 void MacroAssembler::compare_and_branch_optimized(Register r1, 1659 Register r2, 1660 Assembler::branch_condition cond, 1661 address branch_addr, 1662 bool len64, 1663 bool has_sign) { 1664 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1665 1666 address branch_origin = pc(); 1667 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1668 switch (casenum) { 1669 case 0: z_crj( r1, r2, cond, branch_addr); break; 1670 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1671 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1672 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1673 default: ShouldNotReachHere(); break; 1674 } 1675 } else { 1676 switch (casenum) { 1677 case 0: z_cr( r1, r2); break; 1678 case 1: z_clr(r1, r2); break; 1679 case 2: z_cgr(r1, r2); break; 1680 case 3: z_clgr(r1, r2); break; 1681 default: ShouldNotReachHere(); break; 1682 } 1683 branch_optimized(cond, branch_addr); 1684 } 1685 } 1686 1687 // Generate an optimal compare and branch to the branch target. 1688 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1689 // branch distance is short enough. Loading the target address into a 1690 // register and branching via reg is used as fallback only. 1691 // 1692 // Input: 1693 // r1 - left compare operand (in register) 1694 // x2 - right compare operand (immediate) 1695 void MacroAssembler::compare_and_branch_optimized(Register r1, 1696 jlong x2, 1697 Assembler::branch_condition cond, 1698 Label& branch_target, 1699 bool len64, 1700 bool has_sign) { 1701 address branch_origin = pc(); 1702 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1703 bool is_RelAddr16 = branch_target.is_near() || 1704 (branch_target.is_bound() && 1705 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1706 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1707 1708 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1709 switch (casenum) { 1710 case 0: z_cij( r1, x2, cond, branch_target); break; 1711 case 1: z_clij(r1, x2, cond, branch_target); break; 1712 case 2: z_cgij(r1, x2, cond, branch_target); break; 1713 case 3: z_clgij(r1, x2, cond, branch_target); break; 1714 default: ShouldNotReachHere(); break; 1715 } 1716 return; 1717 } 1718 1719 if (x2 == 0) { 1720 switch (casenum) { 1721 case 0: z_ltr(r1, r1); break; 1722 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1723 case 2: z_ltgr(r1, r1); break; 1724 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1725 default: ShouldNotReachHere(); break; 1726 } 1727 } else { 1728 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1729 switch (casenum) { 1730 case 0: z_chi(r1, x2); break; 1731 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1732 case 2: z_cghi(r1, x2); break; 1733 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1734 default: break; 1735 } 1736 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1737 switch (casenum) { 1738 case 0: z_cfi( r1, x2); break; 1739 case 1: z_clfi(r1, x2); break; 1740 case 2: z_cgfi(r1, x2); break; 1741 case 3: z_clgfi(r1, x2); break; 1742 default: ShouldNotReachHere(); break; 1743 } 1744 } else { 1745 // No instruction with immediate operand possible, so load into register. 1746 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1747 load_const_optimized(scratch, x2); 1748 switch (casenum) { 1749 case 0: z_cr( r1, scratch); break; 1750 case 1: z_clr(r1, scratch); break; 1751 case 2: z_cgr(r1, scratch); break; 1752 case 3: z_clgr(r1, scratch); break; 1753 default: ShouldNotReachHere(); break; 1754 } 1755 } 1756 } 1757 branch_optimized(cond, branch_target); 1758 } 1759 1760 // Generate an optimal compare and branch to the branch target. 1761 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1762 // branch distance is short enough. Loading the target address into a 1763 // register and branching via reg is used as fallback only. 1764 // 1765 // Input: 1766 // r1 - left compare operand 1767 // r2 - right compare operand 1768 void MacroAssembler::compare_and_branch_optimized(Register r1, 1769 Register r2, 1770 Assembler::branch_condition cond, 1771 Label& branch_target, 1772 bool len64, 1773 bool has_sign) { 1774 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1775 1776 if (branch_target.is_bound()) { 1777 address branch_addr = target(branch_target); 1778 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1779 } else { 1780 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1781 switch (casenum) { 1782 case 0: z_crj( r1, r2, cond, branch_target); break; 1783 case 1: z_clrj( r1, r2, cond, branch_target); break; 1784 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1785 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1786 default: ShouldNotReachHere(); break; 1787 } 1788 } else { 1789 switch (casenum) { 1790 case 0: z_cr( r1, r2); break; 1791 case 1: z_clr(r1, r2); break; 1792 case 2: z_cgr(r1, r2); break; 1793 case 3: z_clgr(r1, r2); break; 1794 default: ShouldNotReachHere(); break; 1795 } 1796 branch_optimized(cond, branch_target); 1797 } 1798 } 1799 } 1800 1801 //=========================================================================== 1802 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1803 //=========================================================================== 1804 1805 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1806 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1807 int index = oop_recorder()->allocate_metadata_index(obj); 1808 RelocationHolder rspec = metadata_Relocation::spec(index); 1809 return AddressLiteral((address)obj, rspec); 1810 } 1811 1812 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1813 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1814 int index = oop_recorder()->find_index(obj); 1815 RelocationHolder rspec = metadata_Relocation::spec(index); 1816 return AddressLiteral((address)obj, rspec); 1817 } 1818 1819 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1820 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1821 int oop_index = oop_recorder()->allocate_oop_index(obj); 1822 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1823 } 1824 1825 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1826 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1827 int oop_index = oop_recorder()->find_index(obj); 1828 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1829 } 1830 1831 // NOTE: destroys r 1832 void MacroAssembler::c2bool(Register r, Register t) { 1833 z_lcr(t, r); // t = -r 1834 z_or(r, t); // r = -r OR r 1835 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1836 } 1837 1838 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1839 // and return the resulting instruction. 1840 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1841 // relative positions. 1842 // Use correct argument types. Do not pre-calculate distance. 1843 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1844 int c = 0; 1845 unsigned long patched_inst = 0; 1846 if (is_call_pcrelative_short(inst) || 1847 is_branch_pcrelative_short(inst) || 1848 is_branchoncount_pcrelative_short(inst) || 1849 is_branchonindex32_pcrelative_short(inst)) { 1850 c = 1; 1851 int m = fmask(15, 0); // simm16(-1, 16, 32); 1852 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1853 patched_inst = (inst & ~m) | v; 1854 } else if (is_compareandbranch_pcrelative_short(inst)) { 1855 c = 2; 1856 long m = fmask(31, 16); // simm16(-1, 16, 48); 1857 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1858 patched_inst = (inst & ~m) | v; 1859 } else if (is_branchonindex64_pcrelative_short(inst)) { 1860 c = 3; 1861 long m = fmask(31, 16); // simm16(-1, 16, 48); 1862 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1863 patched_inst = (inst & ~m) | v; 1864 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1865 c = 4; 1866 long m = fmask(31, 0); // simm32(-1, 16, 48); 1867 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1868 patched_inst = (inst & ~m) | v; 1869 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1870 c = 5; 1871 long m = fmask(31, 0); // simm32(-1, 16, 48); 1872 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1873 patched_inst = (inst & ~m) | v; 1874 } else { 1875 print_dbg_msg(tty, inst, "not a relative branch", 0); 1876 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1877 ShouldNotReachHere(); 1878 } 1879 1880 long new_off = get_pcrel_offset(patched_inst); 1881 if (new_off != (dest_pos-inst_pos)) { 1882 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1883 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1884 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1885 #ifdef LUCY_DBG 1886 VM_Version::z_SIGSEGV(); 1887 #endif 1888 ShouldNotReachHere(); 1889 } 1890 return patched_inst; 1891 } 1892 1893 // Only called when binding labels (share/vm/asm/assembler.cpp) 1894 // Pass arguments as intended. Do not pre-calculate distance. 1895 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1896 unsigned long stub_inst; 1897 int inst_len = get_instruction(branch, &stub_inst); 1898 1899 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1900 } 1901 1902 1903 // Extract relative address (aka offset). 1904 // inv_simm16 works for 4-byte instructions only. 1905 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1906 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1907 1908 if (MacroAssembler::is_pcrelative_short(inst)) { 1909 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1910 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1911 } else { 1912 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1913 } 1914 } 1915 1916 if (MacroAssembler::is_pcrelative_long(inst)) { 1917 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1918 } 1919 1920 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1921 #ifdef LUCY_DBG 1922 VM_Version::z_SIGSEGV(); 1923 #else 1924 ShouldNotReachHere(); 1925 #endif 1926 return -1; 1927 } 1928 1929 long MacroAssembler::get_pcrel_offset(address pc) { 1930 unsigned long inst; 1931 unsigned int len = get_instruction(pc, &inst); 1932 1933 #ifdef ASSERT 1934 long offset; 1935 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1936 offset = get_pcrel_offset(inst); 1937 } else { 1938 offset = -1; 1939 } 1940 1941 if (offset == -1) { 1942 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1943 #ifdef LUCY_DBG 1944 VM_Version::z_SIGSEGV(); 1945 #else 1946 ShouldNotReachHere(); 1947 #endif 1948 } 1949 return offset; 1950 #else 1951 return get_pcrel_offset(inst); 1952 #endif // ASSERT 1953 } 1954 1955 // Get target address from pc-relative instructions. 1956 address MacroAssembler::get_target_addr_pcrel(address pc) { 1957 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1958 return pc + get_pcrel_offset(pc); 1959 } 1960 1961 // Patch pc relative load address. 1962 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1963 unsigned long inst; 1964 // Offset is +/- 2**32 -> use long. 1965 ptrdiff_t distance = con - pc; 1966 1967 get_instruction(pc, &inst); 1968 1969 if (is_pcrelative_short(inst)) { 1970 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1971 1972 // Some extra safety net. 1973 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1974 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1975 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1976 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1977 } 1978 return; 1979 } 1980 1981 if (is_pcrelative_long(inst)) { 1982 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1983 1984 // Some Extra safety net. 1985 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1986 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1987 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1988 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1989 } 1990 return; 1991 } 1992 1993 guarantee(false, "not a pcrelative instruction to patch!"); 1994 } 1995 1996 // "Current PC" here means the address just behind the basr instruction. 1997 address MacroAssembler::get_PC(Register result) { 1998 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 1999 return pc(); 2000 } 2001 2002 // Get current PC + offset. 2003 // Offset given in bytes, must be even! 2004 // "Current PC" here means the address of the larl instruction plus the given offset. 2005 address MacroAssembler::get_PC(Register result, int64_t offset) { 2006 address here = pc(); 2007 z_larl(result, offset/2); // Save target instruction address in result. 2008 return here + offset; 2009 } 2010 2011 void MacroAssembler::instr_size(Register size, Register pc) { 2012 // Extract 2 most significant bits of current instruction. 2013 z_llgc(size, Address(pc)); 2014 z_srl(size, 6); 2015 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 2016 z_ahi(size, 3); 2017 z_nill(size, 6); 2018 } 2019 2020 // Resize_frame with SP(new) = SP(old) - [offset]. 2021 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 2022 { 2023 assert_different_registers(offset, fp, Z_SP); 2024 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2025 2026 z_sgr(Z_SP, offset); 2027 z_stg(fp, _z_abi(callers_sp), Z_SP); 2028 } 2029 2030 // Resize_frame with SP(new) = [newSP] + offset. 2031 // This emitter is useful if we already have calculated a pointer 2032 // into the to-be-allocated stack space, e.g. with special alignment properties, 2033 // but need some additional space, e.g. for spilling. 2034 // newSP is the pre-calculated pointer. It must not be modified. 2035 // fp holds, or is filled with, the frame pointer. 2036 // offset is the additional increment which is added to addr to form the new SP. 2037 // Note: specify a negative value to reserve more space! 2038 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2039 // It does not guarantee that fp contains the frame pointer at the end. 2040 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2041 assert_different_registers(newSP, fp, Z_SP); 2042 2043 if (load_fp) { 2044 z_lg(fp, _z_abi(callers_sp), Z_SP); 2045 } 2046 2047 add2reg(Z_SP, offset, newSP); 2048 z_stg(fp, _z_abi(callers_sp), Z_SP); 2049 } 2050 2051 // Resize_frame with SP(new) = [newSP]. 2052 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2053 // It does not guarantee that fp contains the frame pointer at the end. 2054 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2055 assert_different_registers(newSP, fp, Z_SP); 2056 2057 if (load_fp) { 2058 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2059 } 2060 2061 z_lgr(Z_SP, newSP); 2062 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2063 z_stg(fp, _z_abi(callers_sp), newSP); 2064 } else { 2065 z_stg(fp, _z_abi(callers_sp), Z_SP); 2066 } 2067 } 2068 2069 // Resize_frame with SP(new) = SP(old) + offset. 2070 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2071 assert_different_registers(fp, Z_SP); 2072 2073 if (load_fp) { 2074 z_lg(fp, _z_abi(callers_sp), Z_SP); 2075 } 2076 add64(Z_SP, offset); 2077 z_stg(fp, _z_abi(callers_sp), Z_SP); 2078 } 2079 2080 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2081 #ifdef ASSERT 2082 assert_different_registers(bytes, old_sp, Z_SP); 2083 if (!copy_sp) { 2084 z_cgr(old_sp, Z_SP); 2085 asm_assert(bcondEqual, "[old_sp]!=[Z_SP]", 0x211); 2086 } 2087 #endif 2088 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2089 if (bytes_with_inverted_sign) { 2090 z_agr(Z_SP, bytes); 2091 } else { 2092 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2093 } 2094 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2095 } 2096 2097 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2098 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2099 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2100 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2101 2102 // We must not write outside the current stack bounds (given by Z_SP). 2103 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2104 // We rely on Z_R0 by default to be available as scratch. 2105 z_lgr(scratch, Z_SP); 2106 add2reg(Z_SP, -offset); 2107 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2108 #ifdef ASSERT 2109 // Just make sure nobody uses the value in the default scratch register. 2110 // When another register is used, the caller might rely on it containing the frame pointer. 2111 if (scratch == Z_R0) { 2112 z_iihf(scratch, 0xbaadbabe); 2113 z_iilf(scratch, 0xdeadbeef); 2114 } 2115 #endif 2116 return offset; 2117 } 2118 2119 // Push a frame of size `bytes' plus abi160 on top. 2120 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2121 BLOCK_COMMENT("push_frame_abi160 {"); 2122 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2123 BLOCK_COMMENT("} push_frame_abi160"); 2124 return res; 2125 } 2126 2127 // Pop current C frame. 2128 void MacroAssembler::pop_frame() { 2129 BLOCK_COMMENT("pop_frame:"); 2130 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2131 } 2132 2133 // Pop current C frame and restore return PC register (Z_R14). 2134 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2135 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2136 int retPC_offset = _z_common_abi(return_pc) + frame_size_in_bytes; 2137 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2138 if (Displacement::is_validDisp(retPC_offset)) { 2139 z_lg(Z_R14, retPC_offset, Z_SP); 2140 add2reg(Z_SP, frame_size_in_bytes); 2141 } else { 2142 add2reg(Z_SP, frame_size_in_bytes); 2143 restore_return_pc(); 2144 } 2145 } 2146 2147 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2148 if (allow_relocation) { 2149 call_c(entry_point); 2150 } else { 2151 call_c_static(entry_point); 2152 } 2153 } 2154 2155 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2156 bool allow_relocation = true; 2157 call_VM_leaf_base(entry_point, allow_relocation); 2158 } 2159 2160 int MacroAssembler::ic_check_size() { 2161 return 30 + (ImplicitNullChecks ? 0 : 6); 2162 } 2163 2164 int MacroAssembler::ic_check(int end_alignment) { 2165 Register R2_receiver = Z_ARG1; 2166 Register R0_scratch = Z_R0_scratch; 2167 Register R1_scratch = Z_R1_scratch; 2168 Register R9_data = Z_inline_cache; 2169 Label success, failure; 2170 2171 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 2172 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 2173 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 2174 // before the inline cache check here, and not after 2175 align(end_alignment, offset() + ic_check_size()); 2176 2177 int uep_offset = offset(); 2178 if (!ImplicitNullChecks) { 2179 z_cgij(R2_receiver, 0, Assembler::bcondEqual, failure); 2180 } 2181 2182 if (UseCompressedClassPointers) { 2183 z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2184 } else { 2185 z_lg(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2186 } 2187 z_cg(R1_scratch, Address(R9_data, in_bytes(CompiledICData::speculated_klass_offset()))); 2188 z_bre(success); 2189 2190 bind(failure); 2191 load_const(R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub())); 2192 z_br(R1_scratch); 2193 bind(success); 2194 2195 assert((offset() % end_alignment) == 0, "Misaligned verified entry point, offset() = %d, end_alignment = %d", offset(), end_alignment); 2196 return uep_offset; 2197 } 2198 2199 void MacroAssembler::call_VM_base(Register oop_result, 2200 Register last_java_sp, 2201 address entry_point, 2202 bool allow_relocation, 2203 bool check_exceptions) { // Defaults to true. 2204 // Allow_relocation indicates, if true, that the generated code shall 2205 // be fit for code relocation or referenced data relocation. In other 2206 // words: all addresses must be considered variable. PC-relative addressing 2207 // is not possible then. 2208 // On the other hand, if (allow_relocation == false), addresses and offsets 2209 // may be considered stable, enabling us to take advantage of some PC-relative 2210 // addressing tweaks. These might improve performance and reduce code size. 2211 2212 // Determine last_java_sp register. 2213 if (!last_java_sp->is_valid()) { 2214 last_java_sp = Z_SP; // Load Z_SP as SP. 2215 } 2216 2217 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2218 2219 // ARG1 must hold thread address. 2220 z_lgr(Z_ARG1, Z_thread); 2221 2222 address return_pc = nullptr; 2223 if (allow_relocation) { 2224 return_pc = call_c(entry_point); 2225 } else { 2226 return_pc = call_c_static(entry_point); 2227 } 2228 2229 reset_last_Java_frame(allow_relocation); 2230 2231 // C++ interp handles this in the interpreter. 2232 check_and_handle_popframe(Z_thread); 2233 check_and_handle_earlyret(Z_thread); 2234 2235 // Check for pending exceptions. 2236 if (check_exceptions) { 2237 // Check for pending exceptions (java_thread is set upon return). 2238 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2239 2240 // This used to conditionally jump to forward_exception however it is 2241 // possible if we relocate that the branch will not reach. So we must jump 2242 // around so we can always reach. 2243 2244 Label ok; 2245 z_bre(ok); // Bcondequal is the same as bcondZero. 2246 call_stub(StubRoutines::forward_exception_entry()); 2247 bind(ok); 2248 } 2249 2250 // Get oop result if there is one and reset the value in the thread. 2251 if (oop_result->is_valid()) { 2252 get_vm_result(oop_result); 2253 } 2254 2255 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2256 } 2257 2258 void MacroAssembler::call_VM_base(Register oop_result, 2259 Register last_java_sp, 2260 address entry_point, 2261 bool check_exceptions) { // Defaults to true. 2262 bool allow_relocation = true; 2263 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2264 } 2265 2266 // VM calls without explicit last_java_sp. 2267 2268 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2269 // Call takes possible detour via InterpreterMacroAssembler. 2270 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2271 } 2272 2273 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2274 // Z_ARG1 is reserved for the thread. 2275 lgr_if_needed(Z_ARG2, arg_1); 2276 call_VM(oop_result, entry_point, check_exceptions); 2277 } 2278 2279 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2280 // Z_ARG1 is reserved for the thread. 2281 assert_different_registers(arg_2, Z_ARG2); 2282 lgr_if_needed(Z_ARG2, arg_1); 2283 lgr_if_needed(Z_ARG3, arg_2); 2284 call_VM(oop_result, entry_point, check_exceptions); 2285 } 2286 2287 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2288 Register arg_3, bool check_exceptions) { 2289 // Z_ARG1 is reserved for the thread. 2290 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2291 assert_different_registers(arg_2, Z_ARG2); 2292 lgr_if_needed(Z_ARG2, arg_1); 2293 lgr_if_needed(Z_ARG3, arg_2); 2294 lgr_if_needed(Z_ARG4, arg_3); 2295 call_VM(oop_result, entry_point, check_exceptions); 2296 } 2297 2298 // VM static calls without explicit last_java_sp. 2299 2300 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2301 // Call takes possible detour via InterpreterMacroAssembler. 2302 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2303 } 2304 2305 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2306 Register arg_3, bool check_exceptions) { 2307 // Z_ARG1 is reserved for the thread. 2308 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2309 assert_different_registers(arg_2, Z_ARG2); 2310 lgr_if_needed(Z_ARG2, arg_1); 2311 lgr_if_needed(Z_ARG3, arg_2); 2312 lgr_if_needed(Z_ARG4, arg_3); 2313 call_VM_static(oop_result, entry_point, check_exceptions); 2314 } 2315 2316 // VM calls with explicit last_java_sp. 2317 2318 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2319 // Call takes possible detour via InterpreterMacroAssembler. 2320 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2321 } 2322 2323 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2324 // Z_ARG1 is reserved for the thread. 2325 lgr_if_needed(Z_ARG2, arg_1); 2326 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2327 } 2328 2329 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2330 Register arg_2, bool check_exceptions) { 2331 // Z_ARG1 is reserved for the thread. 2332 assert_different_registers(arg_2, Z_ARG2); 2333 lgr_if_needed(Z_ARG2, arg_1); 2334 lgr_if_needed(Z_ARG3, arg_2); 2335 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2336 } 2337 2338 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2339 Register arg_2, Register arg_3, bool check_exceptions) { 2340 // Z_ARG1 is reserved for the thread. 2341 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2342 assert_different_registers(arg_2, Z_ARG2); 2343 lgr_if_needed(Z_ARG2, arg_1); 2344 lgr_if_needed(Z_ARG3, arg_2); 2345 lgr_if_needed(Z_ARG4, arg_3); 2346 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2347 } 2348 2349 // VM leaf calls. 2350 2351 void MacroAssembler::call_VM_leaf(address entry_point) { 2352 // Call takes possible detour via InterpreterMacroAssembler. 2353 call_VM_leaf_base(entry_point, true); 2354 } 2355 2356 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2357 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2358 call_VM_leaf(entry_point); 2359 } 2360 2361 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2362 assert_different_registers(arg_2, Z_ARG1); 2363 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2364 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2365 call_VM_leaf(entry_point); 2366 } 2367 2368 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2369 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2370 assert_different_registers(arg_2, Z_ARG1); 2371 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2372 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2373 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2374 call_VM_leaf(entry_point); 2375 } 2376 2377 // Static VM leaf calls. 2378 // Really static VM leaf calls are never patched. 2379 2380 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2381 // Call takes possible detour via InterpreterMacroAssembler. 2382 call_VM_leaf_base(entry_point, false); 2383 } 2384 2385 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2386 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2387 call_VM_leaf_static(entry_point); 2388 } 2389 2390 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2391 assert_different_registers(arg_2, Z_ARG1); 2392 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2393 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2394 call_VM_leaf_static(entry_point); 2395 } 2396 2397 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2398 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2399 assert_different_registers(arg_2, Z_ARG1); 2400 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2401 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2402 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2403 call_VM_leaf_static(entry_point); 2404 } 2405 2406 // Don't use detour via call_c(reg). 2407 address MacroAssembler::call_c(address function_entry) { 2408 load_const(Z_R1, function_entry); 2409 return call(Z_R1); 2410 } 2411 2412 // Variant for really static (non-relocatable) calls which are never patched. 2413 address MacroAssembler::call_c_static(address function_entry) { 2414 load_absolute_address(Z_R1, function_entry); 2415 #if 0 // def ASSERT 2416 // Verify that call site did not move. 2417 load_const_optimized(Z_R0, function_entry); 2418 z_cgr(Z_R1, Z_R0); 2419 z_brc(bcondEqual, 3); 2420 z_illtrap(0xba); 2421 #endif 2422 return call(Z_R1); 2423 } 2424 2425 address MacroAssembler::call_c_opt(address function_entry) { 2426 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2427 _last_calls_return_pc = success ? pc() : nullptr; 2428 return _last_calls_return_pc; 2429 } 2430 2431 // Identify a call_far_patchable instruction: LARL + LG + BASR 2432 // 2433 // nop ; optionally, if required for alignment 2434 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2435 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2436 // 2437 // Code pattern will eventually get patched into variant2 (see below for detection code). 2438 // 2439 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2440 address iaddr = instruction_addr; 2441 2442 // Check for the actual load instruction. 2443 if (!is_load_const_from_toc(iaddr)) { return false; } 2444 iaddr += load_const_from_toc_size(); 2445 2446 // Check for the call (BASR) instruction, finally. 2447 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2448 return is_call_byregister(iaddr); 2449 } 2450 2451 // Identify a call_far_patchable instruction: BRASL 2452 // 2453 // Code pattern to suits atomic patching: 2454 // nop ; Optionally, if required for alignment. 2455 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2456 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2457 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2458 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2459 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2460 2461 // Check for correct number of leading nops. 2462 address iaddr; 2463 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2464 if (!is_z_nop(iaddr)) { return false; } 2465 } 2466 assert(iaddr == call_addr, "sanity"); 2467 2468 // --> Check for call instruction. 2469 if (is_call_far_pcrelative(call_addr)) { 2470 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2471 return true; 2472 } 2473 2474 return false; 2475 } 2476 2477 // Emit a NOT mt-safely patchable 64 bit absolute call. 2478 // If toc_offset == -2, then the destination of the call (= target) is emitted 2479 // to the constant pool and a runtime_call relocation is added 2480 // to the code buffer. 2481 // If toc_offset != -2, target must already be in the constant pool at 2482 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2483 // from the runtime_call relocation). 2484 // Special handling of emitting to scratch buffer when there is no constant pool. 2485 // Slightly changed code pattern. We emit an additional nop if we would 2486 // not end emitting at a word aligned address. This is to ensure 2487 // an atomically patchable displacement in brasl instructions. 2488 // 2489 // A call_far_patchable comes in different flavors: 2490 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2491 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2492 // - BRASL (relative address of call target coded in instruction) 2493 // All flavors occupy the same amount of space. Length differences are compensated 2494 // by leading nops, such that the instruction sequence always ends at the same 2495 // byte offset. This is required to keep the return offset constant. 2496 // Furthermore, the return address (the end of the instruction sequence) is forced 2497 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2498 // need to patch the call target of the BRASL flavor. 2499 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2500 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2501 // Get current pc and ensure word alignment for end of instr sequence. 2502 const address start_pc = pc(); 2503 const intptr_t start_off = offset(); 2504 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2505 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2506 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2507 const bool emit_relative_call = !emit_target_to_pool && 2508 RelAddr::is_in_range_of_RelAddr32(dist) && 2509 ReoptimizeCallSequences && 2510 !code_section()->scratch_emit(); 2511 2512 if (emit_relative_call) { 2513 // Add padding to get the same size as below. 2514 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2515 unsigned int current_padding; 2516 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2517 assert(current_padding == padding, "sanity"); 2518 2519 // relative call: len = 2(nop) + 6 (brasl) 2520 // CodeBlob resize cannot occur in this case because 2521 // this call is emitted into pre-existing space. 2522 z_nop(); // Prepend each BRASL with a nop. 2523 z_brasl(Z_R14, target); 2524 } else { 2525 // absolute call: Get address from TOC. 2526 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2527 if (emit_target_to_pool) { 2528 // When emitting the call for the first time, we do not need to use 2529 // the pc-relative version. It will be patched anyway, when the code 2530 // buffer is copied. 2531 // Relocation is not needed when !ReoptimizeCallSequences. 2532 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2533 AddressLiteral dest(target, rt); 2534 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2535 // inst_mark(). Reset if possible. 2536 bool reset_mark = (inst_mark() == pc()); 2537 tocOffset = store_oop_in_toc(dest); 2538 if (reset_mark) { set_inst_mark(); } 2539 if (tocOffset == -1) { 2540 return false; // Couldn't create constant pool entry. 2541 } 2542 } 2543 assert(offset() == start_off, "emit no code before this point!"); 2544 2545 address tocPos = pc() + tocOffset; 2546 if (emit_target_to_pool) { 2547 tocPos = code()->consts()->start() + tocOffset; 2548 } 2549 load_long_pcrelative(Z_R14, tocPos); 2550 z_basr(Z_R14, Z_R14); 2551 } 2552 2553 #ifdef ASSERT 2554 // Assert that we can identify the emitted call. 2555 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2556 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2557 2558 if (emit_target_to_pool) { 2559 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2560 "wrong encoding of dest address"); 2561 } 2562 #endif 2563 return true; // success 2564 } 2565 2566 // Identify a call_far_patchable instruction. 2567 // For more detailed information see header comment of call_far_patchable. 2568 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2569 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2570 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2571 } 2572 2573 // Does the call_far_patchable instruction use a pc-relative encoding 2574 // of the call destination? 2575 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2576 // Variant 2 is pc-relative. 2577 return is_call_far_patchable_variant2_at(instruction_addr); 2578 } 2579 2580 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2581 // Prepend each BRASL with a nop. 2582 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2583 } 2584 2585 // Set destination address of a call_far_patchable instruction. 2586 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2587 ResourceMark rm; 2588 2589 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2590 int code_size = MacroAssembler::call_far_patchable_size(); 2591 CodeBuffer buf(instruction_addr, code_size); 2592 MacroAssembler masm(&buf); 2593 masm.call_far_patchable(dest, tocOffset); 2594 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2595 } 2596 2597 // Get dest address of a call_far_patchable instruction. 2598 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2599 // Dynamic TOC: absolute address in constant pool. 2600 // Check variant2 first, it is more frequent. 2601 2602 // Relative address encoded in call instruction. 2603 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2604 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2605 2606 // Absolute address in constant pool. 2607 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2608 address iaddr = instruction_addr; 2609 2610 long tocOffset = get_load_const_from_toc_offset(iaddr); 2611 address tocLoc = iaddr + tocOffset; 2612 return *(address *)(tocLoc); 2613 } else { 2614 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2615 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2616 *(unsigned long*)instruction_addr, 2617 *(unsigned long*)(instruction_addr+8), 2618 call_far_patchable_size()); 2619 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2620 ShouldNotReachHere(); 2621 return nullptr; 2622 } 2623 } 2624 2625 void MacroAssembler::align_call_far_patchable(address pc) { 2626 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2627 } 2628 2629 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2630 } 2631 2632 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2633 } 2634 2635 // Read from the polling page. 2636 // Use TM or TMY instruction, depending on read offset. 2637 // offset = 0: Use TM, safepoint polling. 2638 // offset < 0: Use TMY, profiling safepoint polling. 2639 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2640 if (Immediate::is_uimm12(offset)) { 2641 z_tm(offset, polling_page_address, mask_safepoint); 2642 } else { 2643 z_tmy(offset, polling_page_address, mask_profiling); 2644 } 2645 } 2646 2647 // Check whether z_instruction is a read access to the polling page 2648 // which was emitted by load_from_polling_page(..). 2649 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2650 unsigned long z_instruction; 2651 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2652 2653 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2654 2655 if (ilen == 4) { 2656 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2657 2658 int ms = inv_mask(z_instruction,8,32); // mask 2659 int ra = inv_reg(z_instruction,16,32); // base register 2660 int ds = inv_uimm12(z_instruction); // displacement 2661 2662 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2663 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2664 } 2665 2666 } else { /* if (ilen == 6) */ 2667 2668 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2669 2670 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2671 2672 int ms = inv_mask(z_instruction,8,48); // mask 2673 int ra = inv_reg(z_instruction,16,48); // base register 2674 int ds = inv_simm20(z_instruction); // displacement 2675 } 2676 2677 return true; 2678 } 2679 2680 // Extract poll address from instruction and ucontext. 2681 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2682 assert(ucontext != nullptr, "must have ucontext"); 2683 ucontext_t* uc = (ucontext_t*) ucontext; 2684 unsigned long z_instruction; 2685 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2686 2687 if (ilen == 4 && is_z_tm(z_instruction)) { 2688 int ra = inv_reg(z_instruction, 16, 32); // base register 2689 int ds = inv_uimm12(z_instruction); // displacement 2690 address addr = (address)uc->uc_mcontext.gregs[ra]; 2691 return addr + ds; 2692 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2693 int ra = inv_reg(z_instruction, 16, 48); // base register 2694 int ds = inv_simm20(z_instruction); // displacement 2695 address addr = (address)uc->uc_mcontext.gregs[ra]; 2696 return addr + ds; 2697 } 2698 2699 ShouldNotReachHere(); 2700 return nullptr; 2701 } 2702 2703 // Extract poll register from instruction. 2704 uint MacroAssembler::get_poll_register(address instr_loc) { 2705 unsigned long z_instruction; 2706 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2707 2708 if (ilen == 4 && is_z_tm(z_instruction)) { 2709 return (uint)inv_reg(z_instruction, 16, 32); // base register 2710 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2711 return (uint)inv_reg(z_instruction, 16, 48); // base register 2712 } 2713 2714 ShouldNotReachHere(); 2715 return 0; 2716 } 2717 2718 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2719 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2720 // Armed page has poll_bit set. 2721 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2722 z_brnaz(slow_path); 2723 } 2724 2725 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2726 void MacroAssembler::bang_stack_with_offset(int offset) { 2727 // Stack grows down, caller passes positive offset. 2728 assert(offset > 0, "must bang with positive offset"); 2729 if (Displacement::is_validDisp(-offset)) { 2730 z_tmy(-offset, Z_SP, mask_stackbang); 2731 } else { 2732 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2733 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2734 } 2735 } 2736 2737 void MacroAssembler::reserved_stack_check(Register return_pc) { 2738 // Test if reserved zone needs to be enabled. 2739 Label no_reserved_zone_enabling; 2740 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2741 BLOCK_COMMENT("reserved_stack_check {"); 2742 2743 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2744 z_brl(no_reserved_zone_enabling); 2745 2746 // Enable reserved zone again, throw stack overflow exception. 2747 save_return_pc(); 2748 push_frame_abi160(0); 2749 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2750 pop_frame(); 2751 restore_return_pc(); 2752 2753 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry()); 2754 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2755 z_br(Z_R1); 2756 2757 should_not_reach_here(); 2758 2759 bind(no_reserved_zone_enabling); 2760 BLOCK_COMMENT("} reserved_stack_check"); 2761 } 2762 2763 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2764 void MacroAssembler::tlab_allocate(Register obj, 2765 Register var_size_in_bytes, 2766 int con_size_in_bytes, 2767 Register t1, 2768 Label& slow_case) { 2769 assert_different_registers(obj, var_size_in_bytes, t1); 2770 Register end = t1; 2771 Register thread = Z_thread; 2772 2773 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2774 if (var_size_in_bytes == noreg) { 2775 z_lay(end, Address(obj, con_size_in_bytes)); 2776 } else { 2777 z_lay(end, Address(obj, var_size_in_bytes)); 2778 } 2779 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2780 branch_optimized(bcondHigh, slow_case); 2781 2782 // Update the tlab top pointer. 2783 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2784 2785 // Recover var_size_in_bytes if necessary. 2786 if (var_size_in_bytes == end) { 2787 z_sgr(var_size_in_bytes, obj); 2788 } 2789 } 2790 2791 // Emitter for interface method lookup. 2792 // input: recv_klass, intf_klass, itable_index 2793 // output: method_result 2794 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2795 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2796 // If the register is still not needed then, remove it. 2797 void MacroAssembler::lookup_interface_method(Register recv_klass, 2798 Register intf_klass, 2799 RegisterOrConstant itable_index, 2800 Register method_result, 2801 Register temp1_reg, 2802 Label& no_such_interface, 2803 bool return_method) { 2804 2805 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2806 const Register itable_entry_addr = Z_R1_scratch; 2807 const Register itable_interface = Z_R0_scratch; 2808 2809 BLOCK_COMMENT("lookup_interface_method {"); 2810 2811 // Load start of itable entries into itable_entry_addr. 2812 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2813 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2814 2815 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2816 add2reg_with_index(itable_entry_addr, 2817 in_bytes(Klass::vtable_start_offset() + itableOffsetEntry::interface_offset()), 2818 recv_klass, vtable_len); 2819 2820 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2821 Label search; 2822 2823 bind(search); 2824 2825 // Handle IncompatibleClassChangeError. 2826 // If the entry is null then we've reached the end of the table 2827 // without finding the expected interface, so throw an exception. 2828 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2829 z_bre(no_such_interface); 2830 2831 add2reg(itable_entry_addr, itable_offset_search_inc); 2832 z_cgr(itable_interface, intf_klass); 2833 z_brne(search); 2834 2835 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2836 if (return_method) { 2837 const int vtable_offset_offset = in_bytes(itableOffsetEntry::offset_offset() - 2838 itableOffsetEntry::interface_offset()) - 2839 itable_offset_search_inc; 2840 2841 // Compute itableMethodEntry and get method and entry point 2842 // we use addressing with index and displacement, since the formula 2843 // for computing the entry's offset has a fixed and a dynamic part, 2844 // the latter depending on the matched interface entry and on the case, 2845 // that the itable index has been passed as a register, not a constant value. 2846 int method_offset = in_bytes(itableMethodEntry::method_offset()); 2847 // Fixed part (displacement), common operand. 2848 Register itable_offset = method_result; // Dynamic part (index register). 2849 2850 if (itable_index.is_register()) { 2851 // Compute the method's offset in that register, for the formula, see the 2852 // else-clause below. 2853 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2854 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2855 } else { 2856 // Displacement increases. 2857 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2858 2859 // Load index from itable. 2860 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2861 } 2862 2863 // Finally load the method's oop. 2864 z_lg(method_result, method_offset, itable_offset, recv_klass); 2865 } 2866 BLOCK_COMMENT("} lookup_interface_method"); 2867 } 2868 2869 // Lookup for virtual method invocation. 2870 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2871 RegisterOrConstant vtable_index, 2872 Register method_result) { 2873 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2874 assert(vtableEntry::size() * wordSize == wordSize, 2875 "else adjust the scaling in the code below"); 2876 2877 BLOCK_COMMENT("lookup_virtual_method {"); 2878 2879 const int base = in_bytes(Klass::vtable_start_offset()); 2880 2881 if (vtable_index.is_constant()) { 2882 // Load with base + disp. 2883 Address vtable_entry_addr(recv_klass, 2884 vtable_index.as_constant() * wordSize + 2885 base + 2886 in_bytes(vtableEntry::method_offset())); 2887 2888 z_lg(method_result, vtable_entry_addr); 2889 } else { 2890 // Shift index properly and load with base + index + disp. 2891 Register vindex = vtable_index.as_register(); 2892 Address vtable_entry_addr(recv_klass, vindex, 2893 base + in_bytes(vtableEntry::method_offset())); 2894 2895 z_sllg(vindex, vindex, exact_log2(wordSize)); 2896 z_lg(method_result, vtable_entry_addr); 2897 } 2898 BLOCK_COMMENT("} lookup_virtual_method"); 2899 } 2900 2901 // Factor out code to call ic_miss_handler. 2902 // Generate code to call the inline cache miss handler. 2903 // 2904 // In most cases, this code will be generated out-of-line. 2905 // The method parameters are intended to provide some variability. 2906 // ICM - Label which has to be bound to the start of useful code (past any traps). 2907 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2908 // Any value except 0x00 is supported. 2909 // = 0x00 - do not generate illtrap instructions. 2910 // use nops to fill unused space. 2911 // requiredSize - required size of the generated code. If the actually 2912 // generated code is smaller, use padding instructions to fill up. 2913 // = 0 - no size requirement, no padding. 2914 // scratch - scratch register to hold branch target address. 2915 // 2916 // The method returns the code offset of the bound label. 2917 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2918 intptr_t startOffset = offset(); 2919 2920 // Prevent entry at content_begin(). 2921 if (trapMarker != 0) { 2922 z_illtrap(trapMarker); 2923 } 2924 2925 // Load address of inline cache miss code into scratch register 2926 // and branch to cache miss handler. 2927 BLOCK_COMMENT("IC miss handler {"); 2928 BIND(ICM); 2929 unsigned int labelOffset = offset(); 2930 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2931 2932 load_const_optimized(scratch, icmiss); 2933 z_br(scratch); 2934 2935 // Fill unused space. 2936 if (requiredSize > 0) { 2937 while ((offset() - startOffset) < requiredSize) { 2938 if (trapMarker == 0) { 2939 z_nop(); 2940 } else { 2941 z_illtrap(trapMarker); 2942 } 2943 } 2944 } 2945 BLOCK_COMMENT("} IC miss handler"); 2946 return labelOffset; 2947 } 2948 2949 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2950 Register ic_reg = Z_inline_cache; 2951 int klass_offset = oopDesc::klass_offset_in_bytes(); 2952 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2953 if (VM_Version::has_CompareBranch()) { 2954 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2955 } else { 2956 z_ltgr(Z_ARG1, Z_ARG1); 2957 z_bre(ic_miss); 2958 } 2959 } 2960 // Compare cached class against klass from receiver. 2961 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2962 z_brne(ic_miss); 2963 } 2964 2965 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2966 Register super_klass, 2967 Register temp1_reg, 2968 Label* L_success, 2969 Label* L_failure, 2970 Label* L_slow_path, 2971 RegisterOrConstant super_check_offset) { 2972 2973 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2974 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2975 2976 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2977 bool need_slow_path = (must_load_sco || 2978 super_check_offset.constant_or_zero() == sc_offset); 2979 2980 // Input registers must not overlap. 2981 assert_different_registers(sub_klass, super_klass, temp1_reg); 2982 if (super_check_offset.is_register()) { 2983 assert_different_registers(sub_klass, super_klass, 2984 super_check_offset.as_register()); 2985 } else if (must_load_sco) { 2986 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 2987 } 2988 2989 const Register Rsuper_check_offset = temp1_reg; 2990 2991 NearLabel L_fallthrough; 2992 int label_nulls = 0; 2993 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 2994 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 2995 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 2996 assert(label_nulls <= 1 || 2997 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 2998 "at most one null in the batch, usually"); 2999 3000 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 3001 // If the pointers are equal, we are done (e.g., String[] elements). 3002 // This self-check enables sharing of secondary supertype arrays among 3003 // non-primary types such as array-of-interface. Otherwise, each such 3004 // type would need its own customized SSA. 3005 // We move this check to the front of the fast path because many 3006 // type checks are in fact trivially successful in this manner, 3007 // so we get a nicely predicted branch right at the start of the check. 3008 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 3009 3010 // Check the supertype display, which is uint. 3011 if (must_load_sco) { 3012 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 3013 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 3014 } 3015 Address super_check_addr(sub_klass, super_check_offset, 0); 3016 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 3017 3018 // This check has worked decisively for primary supers. 3019 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3020 // (Secondary supers are interfaces and very deeply nested subtypes.) 3021 // This works in the same check above because of a tricky aliasing 3022 // between the super_cache and the primary super display elements. 3023 // (The 'super_check_addr' can address either, as the case requires.) 3024 // Note that the cache is updated below if it does not help us find 3025 // what we need immediately. 3026 // So if it was a primary super, we can just fail immediately. 3027 // Otherwise, it's the slow path for us (no success at this point). 3028 3029 // Hacked jmp, which may only be used just before L_fallthrough. 3030 #define final_jmp(label) \ 3031 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3032 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 3033 3034 if (super_check_offset.is_register()) { 3035 branch_optimized(Assembler::bcondEqual, *L_success); 3036 z_cfi(super_check_offset.as_register(), sc_offset); 3037 if (L_failure == &L_fallthrough) { 3038 branch_optimized(Assembler::bcondEqual, *L_slow_path); 3039 } else { 3040 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3041 final_jmp(*L_slow_path); 3042 } 3043 } else if (super_check_offset.as_constant() == sc_offset) { 3044 // Need a slow path; fast failure is impossible. 3045 if (L_slow_path == &L_fallthrough) { 3046 branch_optimized(Assembler::bcondEqual, *L_success); 3047 } else { 3048 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 3049 final_jmp(*L_success); 3050 } 3051 } else { 3052 // No slow path; it's a fast decision. 3053 if (L_failure == &L_fallthrough) { 3054 branch_optimized(Assembler::bcondEqual, *L_success); 3055 } else { 3056 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3057 final_jmp(*L_success); 3058 } 3059 } 3060 3061 bind(L_fallthrough); 3062 #undef local_brc 3063 #undef final_jmp 3064 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3065 // fallthru (to slow path) 3066 } 3067 3068 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3069 Register Rsuperklass, 3070 Register Rarray_ptr, // tmp 3071 Register Rlength, // tmp 3072 Label* L_success, 3073 Label* L_failure) { 3074 // Input registers must not overlap. 3075 // Also check for R1 which is explicitly used here. 3076 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3077 NearLabel L_fallthrough; 3078 int label_nulls = 0; 3079 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3080 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3081 assert(label_nulls <= 1, "at most one null in the batch"); 3082 3083 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3084 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3085 3086 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3087 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3088 3089 // Hacked jmp, which may only be used just before L_fallthrough. 3090 #define final_jmp(label) \ 3091 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3092 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3093 3094 NearLabel loop_iterate, loop_count, match; 3095 3096 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3097 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3098 3099 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3100 branch_optimized(Assembler::bcondZero, *L_failure); 3101 3102 // Oops in table are NO MORE compressed. 3103 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3104 z_bre(match); // Shortcut for array length = 1. 3105 3106 // No match yet, so we must walk the array's elements. 3107 z_lngfr(Rlength, Rlength); 3108 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3109 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3110 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3111 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3112 z_bru(loop_count); 3113 3114 BIND(loop_iterate); 3115 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3116 z_bre(match); 3117 BIND(loop_count); 3118 z_brxlg(Rlength, Z_R1, loop_iterate); 3119 3120 // Rsuperklass not found among secondary super classes -> failure. 3121 branch_optimized(Assembler::bcondAlways, *L_failure); 3122 3123 // Got a hit. Return success (zero result). Set cache. 3124 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3125 3126 BIND(match); 3127 3128 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3129 3130 final_jmp(*L_success); 3131 3132 // Exit to the surrounding code. 3133 BIND(L_fallthrough); 3134 #undef local_brc 3135 #undef final_jmp 3136 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3137 } 3138 3139 // Emitter for combining fast and slow path. 3140 void MacroAssembler::check_klass_subtype(Register sub_klass, 3141 Register super_klass, 3142 Register temp1_reg, 3143 Register temp2_reg, 3144 Label& L_success) { 3145 NearLabel failure; 3146 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3147 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3148 &L_success, &failure, nullptr); 3149 check_klass_subtype_slow_path(sub_klass, super_klass, 3150 temp1_reg, temp2_reg, &L_success, nullptr); 3151 BIND(failure); 3152 BLOCK_COMMENT("} check_klass_subtype"); 3153 } 3154 3155 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3156 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 3157 3158 Label L_fallthrough; 3159 if (L_fast_path == nullptr) { 3160 L_fast_path = &L_fallthrough; 3161 } else if (L_slow_path == nullptr) { 3162 L_slow_path = &L_fallthrough; 3163 } 3164 3165 // Fast path check: class is fully initialized 3166 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3167 z_bre(*L_fast_path); 3168 3169 // Fast path check: current thread is initializer thread 3170 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3171 if (L_slow_path == &L_fallthrough) { 3172 z_bre(*L_fast_path); 3173 } else if (L_fast_path == &L_fallthrough) { 3174 z_brne(*L_slow_path); 3175 } else { 3176 Unimplemented(); 3177 } 3178 3179 bind(L_fallthrough); 3180 } 3181 3182 // Increment a counter at counter_address when the eq condition code is 3183 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3184 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3185 Label l; 3186 z_brne(l); 3187 load_const(tmp1_reg, counter_address); 3188 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3189 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3190 bind(l); 3191 } 3192 3193 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3194 Register displacedHeader = temp1; 3195 Register currentHeader = temp1; 3196 Register temp = temp2; 3197 NearLabel done, object_has_monitor; 3198 3199 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3200 3201 assert_different_registers(temp1, temp2, oop, box); 3202 3203 BLOCK_COMMENT("compiler_fast_lock_object {"); 3204 3205 // Load markWord from oop into mark. 3206 z_lg(displacedHeader, hdr_offset, oop); 3207 3208 if (DiagnoseSyncOnValueBasedClasses != 0) { 3209 load_klass(temp, oop); 3210 testbit(Address(temp, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); 3211 z_btrue(done); 3212 } 3213 3214 // Handle existing monitor. 3215 // The object has an existing monitor iff (mark & monitor_value) != 0. 3216 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3217 z_tmll(displacedHeader, markWord::monitor_value); 3218 z_brnaz(object_has_monitor); 3219 3220 if (LockingMode == LM_MONITOR) { 3221 // Set NE to indicate 'failure' -> take slow-path 3222 // From loading the markWord, we know that oop != nullptr 3223 z_ltgr(oop, oop); 3224 z_bru(done); 3225 } else if (LockingMode == LM_LEGACY) { 3226 // Set mark to markWord | markWord::unlocked_value. 3227 z_oill(displacedHeader, markWord::unlocked_value); 3228 3229 // Load Compare Value application register. 3230 3231 // Initialize the box (must happen before we update the object mark). 3232 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3233 3234 // Compare object markWord with mark and if equal, exchange box with object markWork. 3235 // If the compare-and-swap succeeds, then we found an unlocked object and have now locked it. 3236 z_csg(displacedHeader, box, hdr_offset, oop); 3237 assert(currentHeader == displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3238 z_bre(done); 3239 3240 // We did not see an unlocked object 3241 // currentHeader contains what is currently stored in the oop's markWord. 3242 // We might have a recursive case. Verify by checking if the owner is self. 3243 // To do so, compare the value in the markWord (currentHeader) with the stack pointer. 3244 z_sgr(currentHeader, Z_SP); 3245 load_const_optimized(temp, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); 3246 3247 z_ngr(currentHeader, temp); 3248 3249 // result zero: owner is self -> recursive lock. Indicate that by storing 0 in the box. 3250 // result not-zero: attempt failed. We don't hold the lock -> go for slow case. 3251 3252 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3253 3254 z_bru(done); 3255 } else { 3256 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 3257 lightweight_lock(oop, displacedHeader, temp, done); 3258 z_bru(done); 3259 } 3260 3261 bind(object_has_monitor); 3262 3263 Register zero = temp; 3264 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3265 // The object's monitor m is unlocked iff m->owner is null, 3266 // otherwise m->owner may contain a thread or a stack address. 3267 3268 // Try to CAS m->owner from null to current thread. 3269 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. 3270 // Otherwise, register zero is filled with the current owner. 3271 z_lghi(zero, 0); 3272 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3273 if (LockingMode != LM_LIGHTWEIGHT) { 3274 // Store a non-null value into the box. 3275 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3276 } 3277 3278 z_bre(done); // acquired the lock for the first time. 3279 3280 BLOCK_COMMENT("fast_path_recursive_lock {"); 3281 // Check if we are already the owner (recursive lock) 3282 z_cgr(Z_thread, zero); // owner is stored in zero by "z_csg" above 3283 z_brne(done); // not a recursive lock 3284 3285 // Current thread already owns the lock. Just increment recursion count. 3286 z_agsi(Address(monitor_tagged, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll); 3287 z_cgr(zero, zero); // set the CC to EQUAL 3288 BLOCK_COMMENT("} fast_path_recursive_lock"); 3289 bind(done); 3290 3291 BLOCK_COMMENT("} compiler_fast_lock_object"); 3292 // If locking was successful, CR should indicate 'EQ'. 3293 // The compiler or the native wrapper generates a branch to the runtime call 3294 // _complete_monitor_locking_Java. 3295 } 3296 3297 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3298 Register displacedHeader = temp1; 3299 Register currentHeader = temp2; 3300 Register temp = temp1; 3301 3302 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3303 3304 assert_different_registers(temp1, temp2, oop, box); 3305 3306 Label done, object_has_monitor, not_recursive; 3307 3308 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3309 3310 if (LockingMode == LM_LEGACY) { 3311 // Find the lock address and load the displaced header from the stack. 3312 // if the displaced header is zero, we have a recursive unlock. 3313 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3314 z_bre(done); 3315 } 3316 3317 // Handle existing monitor. 3318 // The object has an existing monitor iff (mark & monitor_value) != 0. 3319 z_lg(currentHeader, hdr_offset, oop); 3320 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3321 3322 z_tmll(currentHeader, markWord::monitor_value); 3323 z_brnaz(object_has_monitor); 3324 3325 if (LockingMode == LM_MONITOR) { 3326 // Set NE to indicate 'failure' -> take slow-path 3327 z_ltgr(oop, oop); 3328 z_bru(done); 3329 } else if (LockingMode == LM_LEGACY) { 3330 // Check if it is still a lightweight lock, this is true if we see 3331 // the stack address of the basicLock in the markWord of the object 3332 // copy box to currentHeader such that csg does not kill it. 3333 z_lgr(currentHeader, box); 3334 z_csg(currentHeader, displacedHeader, hdr_offset, oop); 3335 z_bru(done); // csg sets CR as desired. 3336 } else { 3337 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 3338 3339 lightweight_unlock(oop, currentHeader, displacedHeader, done); 3340 z_bru(done); 3341 } 3342 3343 // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0. 3344 // This is handled like owner thread mismatches: We take the slow path. 3345 3346 // Handle existing monitor. 3347 bind(object_has_monitor); 3348 3349 z_cg(Z_thread, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3350 z_brne(done); 3351 3352 BLOCK_COMMENT("fast_path_recursive_unlock {"); 3353 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3354 z_bre(not_recursive); // if 0 then jump, it's not recursive locking 3355 3356 // Recursive inflated unlock 3357 z_agsi(Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll); 3358 z_cgr(currentHeader, currentHeader); // set the CC to EQUAL 3359 BLOCK_COMMENT("} fast_path_recursive_unlock"); 3360 z_bru(done); 3361 3362 bind(not_recursive); 3363 3364 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3365 z_brne(done); 3366 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3367 z_brne(done); 3368 z_release(); 3369 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3370 3371 bind(done); 3372 3373 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3374 // flag == EQ indicates success 3375 // flag == NE indicates failure 3376 } 3377 3378 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3379 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3380 bs->resolve_jobject(this, value, tmp1, tmp2); 3381 } 3382 3383 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3384 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3385 BLOCK_COMMENT("set_last_Java_frame {"); 3386 3387 // Always set last_Java_pc and flags first because once last_Java_sp 3388 // is visible has_last_Java_frame is true and users will look at the 3389 // rest of the fields. (Note: flags should always be zero before we 3390 // get here so doesn't need to be set.) 3391 3392 // Verify that last_Java_pc was zeroed on return to Java. 3393 if (allow_relocation) { 3394 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3395 Z_thread, 3396 "last_Java_pc not zeroed before leaving Java", 3397 0x200); 3398 } else { 3399 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3400 Z_thread, 3401 "last_Java_pc not zeroed before leaving Java", 3402 0x200); 3403 } 3404 3405 // When returning from calling out from Java mode the frame anchor's 3406 // last_Java_pc will always be set to null. It is set here so that 3407 // if we are doing a call to native (not VM) that we capture the 3408 // known pc and don't have to rely on the native call having a 3409 // standard frame linkage where we can find the pc. 3410 if (last_Java_pc!=noreg) { 3411 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3412 } 3413 3414 // This membar release is not required on z/Architecture, since the sequence of stores 3415 // in maintained. Nevertheless, we leave it in to document the required ordering. 3416 // The implementation of z_release() should be empty. 3417 // z_release(); 3418 3419 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3420 BLOCK_COMMENT("} set_last_Java_frame"); 3421 } 3422 3423 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3424 BLOCK_COMMENT("reset_last_Java_frame {"); 3425 3426 if (allow_relocation) { 3427 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3428 Z_thread, 3429 "SP was not set, still zero", 3430 0x202); 3431 } else { 3432 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3433 Z_thread, 3434 "SP was not set, still zero", 3435 0x202); 3436 } 3437 3438 // _last_Java_sp = 0 3439 // Clearing storage must be atomic here, so don't use clear_mem()! 3440 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3441 3442 // _last_Java_pc = 0 3443 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3444 3445 BLOCK_COMMENT("} reset_last_Java_frame"); 3446 return; 3447 } 3448 3449 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3450 assert_different_registers(sp, tmp1); 3451 3452 // We cannot trust that code generated by the C++ compiler saves R14 3453 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3454 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3455 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3456 // it into the frame anchor. 3457 get_PC(tmp1); 3458 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3459 } 3460 3461 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3462 z_release(); 3463 3464 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3465 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3466 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3467 } 3468 3469 void MacroAssembler::get_vm_result(Register oop_result) { 3470 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3471 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3472 3473 verify_oop(oop_result, FILE_AND_LINE); 3474 } 3475 3476 void MacroAssembler::get_vm_result_2(Register result) { 3477 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3478 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3479 } 3480 3481 // We require that C code which does not return a value in vm_result will 3482 // leave it undisturbed. 3483 void MacroAssembler::set_vm_result(Register oop_result) { 3484 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3485 } 3486 3487 // Explicit null checks (used for method handle code). 3488 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3489 if (!ImplicitNullChecks) { 3490 NearLabel ok; 3491 3492 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3493 3494 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3495 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3496 load_absolute_address(reg, exception_entry); 3497 z_br(reg); 3498 3499 bind(ok); 3500 } else { 3501 if (needs_explicit_null_check((intptr_t)offset)) { 3502 // Provoke OS null exception if reg is null by 3503 // accessing M[reg] w/o changing any registers. 3504 z_lg(tmp, 0, reg); 3505 } 3506 // else 3507 // Nothing to do, (later) access of M[reg + offset] 3508 // will provoke OS null exception if reg is null. 3509 } 3510 } 3511 3512 //------------------------------------- 3513 // Compressed Klass Pointers 3514 //------------------------------------- 3515 3516 // Klass oop manipulations if compressed. 3517 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3518 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3519 address base = CompressedKlassPointers::base(); 3520 int shift = CompressedKlassPointers::shift(); 3521 bool need_zero_extend = base != 0; 3522 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3523 3524 BLOCK_COMMENT("cKlass encoder {"); 3525 3526 #ifdef ASSERT 3527 Label ok; 3528 z_tmll(current, KlassAlignmentInBytes-1); // Check alignment. 3529 z_brc(Assembler::bcondAllZero, ok); 3530 // The plain disassembler does not recognize illtrap. It instead displays 3531 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3532 // the proper beginning of the next instruction. 3533 z_illtrap(0xee); 3534 z_illtrap(0xee); 3535 bind(ok); 3536 #endif 3537 3538 // Scale down the incoming klass pointer first. 3539 // We then can be sure we calculate an offset that fits into 32 bit. 3540 // More generally speaking: all subsequent calculations are purely 32-bit. 3541 if (shift != 0) { 3542 assert (LogKlassAlignmentInBytes == shift, "decode alg wrong"); 3543 z_srlg(dst, current, shift); 3544 current = dst; 3545 } 3546 3547 if (base != nullptr) { 3548 // Use scaled-down base address parts to match scaled-down klass pointer. 3549 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3550 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3551 3552 // General considerations: 3553 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3554 // Otherwise, we would end up with a compressed klass pointer which doesn't 3555 // fit into 32-bit. 3556 // - Only bit#33 of the difference could potentially be non-zero. For that 3557 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3558 // will create a borrow out of bit#32, nicely killing bit#33. 3559 // - With the above, we only need to consider current_l and base_l to 3560 // calculate the result. 3561 // - Both values are treated as unsigned. The unsigned subtraction is 3562 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3563 3564 if (base_l == 0) { 3565 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3566 // cancel all high-word bits. Otherwise, we would end up with an offset 3567 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3568 // - current_l remains unchanged. 3569 // - Therefore, we can replace all calculation with just a 3570 // zero-extending load 32 to 64 bit. 3571 // - Even that can be replaced with a conditional load if dst != current. 3572 // (this is a local view. The shift step may have requested zero-extension). 3573 } else { 3574 if ((base_h == 0) && is_uimm(base_l, 31)) { 3575 // If we happen to find that (base_h == 0), and that base_l is within the range 3576 // which can be represented by a signed int, then we can use 64bit signed add with 3577 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3578 // upper 32 bits of the result, saving us the need of an extra zero extension. 3579 // For base_l to be in the required range, it must not have the most significant 3580 // bit (aka sign bit) set. 3581 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3582 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3583 need_zero_extend = false; 3584 current = dst; 3585 } else { 3586 // To begin with, we may need to copy and/or zero-extend the register operand. 3587 // We have to calculate (current_l - base_l). Because there is no unsigend 3588 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3589 if (need_zero_extend) { 3590 z_llgfr(dst, current); 3591 need_zero_extend = false; 3592 } else { 3593 llgfr_if_needed(dst, current); 3594 } 3595 current = dst; 3596 z_alfi(dst, -base_l); 3597 } 3598 } 3599 } 3600 3601 if (need_zero_extend) { 3602 // We must zero-extend the calculated result. It may have some leftover bits in 3603 // the hi-word because we only did optimized calculations. 3604 z_llgfr(dst, current); 3605 } else { 3606 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3607 } 3608 3609 BLOCK_COMMENT("} cKlass encoder"); 3610 } 3611 3612 // This function calculates the size of the code generated by 3613 // decode_klass_not_null(register dst, Register src) 3614 // when Universe::heap() isn't null. Hence, if the instructions 3615 // it generates change, then this method needs to be updated. 3616 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3617 address base = CompressedKlassPointers::base(); 3618 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3619 int addbase_size = 0; 3620 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3621 3622 if (base != nullptr) { 3623 unsigned int base_h = ((unsigned long)base)>>32; 3624 unsigned int base_l = (unsigned int)((unsigned long)base); 3625 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3626 addbase_size += 6; /* aih */ 3627 } else if ((base_h == 0) && (base_l != 0)) { 3628 addbase_size += 6; /* algfi */ 3629 } else { 3630 addbase_size += load_const_size(); 3631 addbase_size += 4; /* algr */ 3632 } 3633 } 3634 #ifdef ASSERT 3635 addbase_size += 10; 3636 addbase_size += 2; // Extra sigill. 3637 #endif 3638 return addbase_size + shift_size; 3639 } 3640 3641 // !!! If the instructions that get generated here change 3642 // then function instr_size_for_decode_klass_not_null() 3643 // needs to get updated. 3644 // This variant of decode_klass_not_null() must generate predictable code! 3645 // The code must only depend on globally known parameters. 3646 void MacroAssembler::decode_klass_not_null(Register dst) { 3647 address base = CompressedKlassPointers::base(); 3648 int shift = CompressedKlassPointers::shift(); 3649 int beg_off = offset(); 3650 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3651 3652 BLOCK_COMMENT("cKlass decoder (const size) {"); 3653 3654 if (shift != 0) { // Shift required? 3655 z_sllg(dst, dst, shift); 3656 } 3657 if (base != nullptr) { 3658 unsigned int base_h = ((unsigned long)base)>>32; 3659 unsigned int base_l = (unsigned int)((unsigned long)base); 3660 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3661 z_aih(dst, base_h); // Base has no set bits in lower half. 3662 } else if ((base_h == 0) && (base_l != 0)) { 3663 z_algfi(dst, base_l); // Base has no set bits in upper half. 3664 } else { 3665 load_const(Z_R0, base); // Base has set bits everywhere. 3666 z_algr(dst, Z_R0); 3667 } 3668 } 3669 3670 #ifdef ASSERT 3671 Label ok; 3672 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3673 z_brc(Assembler::bcondAllZero, ok); 3674 // The plain disassembler does not recognize illtrap. It instead displays 3675 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3676 // the proper beginning of the next instruction. 3677 z_illtrap(0xd1); 3678 z_illtrap(0xd1); 3679 bind(ok); 3680 #endif 3681 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 3682 3683 BLOCK_COMMENT("} cKlass decoder (const size)"); 3684 } 3685 3686 // This variant of decode_klass_not_null() is for cases where 3687 // 1) the size of the generated instructions may vary 3688 // 2) the result is (potentially) stored in a register different from the source. 3689 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3690 address base = CompressedKlassPointers::base(); 3691 int shift = CompressedKlassPointers::shift(); 3692 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3693 3694 BLOCK_COMMENT("cKlass decoder {"); 3695 3696 if (src == noreg) src = dst; 3697 3698 if (shift != 0) { // Shift or at least move required? 3699 z_sllg(dst, src, shift); 3700 } else { 3701 lgr_if_needed(dst, src); 3702 } 3703 3704 if (base != nullptr) { 3705 unsigned int base_h = ((unsigned long)base)>>32; 3706 unsigned int base_l = (unsigned int)((unsigned long)base); 3707 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3708 z_aih(dst, base_h); // Base has not set bits in lower half. 3709 } else if ((base_h == 0) && (base_l != 0)) { 3710 z_algfi(dst, base_l); // Base has no set bits in upper half. 3711 } else { 3712 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 3713 z_algr(dst, Z_R0); 3714 } 3715 } 3716 3717 #ifdef ASSERT 3718 Label ok; 3719 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3720 z_brc(Assembler::bcondAllZero, ok); 3721 // The plain disassembler does not recognize illtrap. It instead displays 3722 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3723 // the proper beginning of the next instruction. 3724 z_illtrap(0xd2); 3725 z_illtrap(0xd2); 3726 bind(ok); 3727 #endif 3728 BLOCK_COMMENT("} cKlass decoder"); 3729 } 3730 3731 void MacroAssembler::load_klass(Register klass, Address mem) { 3732 if (UseCompressedClassPointers) { 3733 z_llgf(klass, mem); 3734 // Attention: no null check here! 3735 decode_klass_not_null(klass); 3736 } else { 3737 z_lg(klass, mem); 3738 } 3739 } 3740 3741 void MacroAssembler::load_klass(Register klass, Register src_oop) { 3742 if (UseCompressedClassPointers) { 3743 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3744 // Attention: no null check here! 3745 decode_klass_not_null(klass); 3746 } else { 3747 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3748 } 3749 } 3750 3751 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 3752 if (UseCompressedClassPointers) { 3753 assert_different_registers(dst_oop, klass, Z_R0); 3754 if (ck == noreg) ck = klass; 3755 encode_klass_not_null(ck, klass); 3756 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3757 } else { 3758 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3759 } 3760 } 3761 3762 void MacroAssembler::store_klass_gap(Register s, Register d) { 3763 if (UseCompressedClassPointers) { 3764 assert(s != d, "not enough registers"); 3765 // Support s = noreg. 3766 if (s != noreg) { 3767 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 3768 } else { 3769 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 3770 } 3771 } 3772 } 3773 3774 // Compare klass ptr in memory against klass ptr in register. 3775 // 3776 // Rop1 - klass in register, always uncompressed. 3777 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 3778 // Rbase - Base address of cKlass in memory. 3779 // maybenull - True if Rop1 possibly is a null. 3780 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull) { 3781 3782 BLOCK_COMMENT("compare klass ptr {"); 3783 3784 if (UseCompressedClassPointers) { 3785 const int shift = CompressedKlassPointers::shift(); 3786 address base = CompressedKlassPointers::base(); 3787 3788 assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift"); 3789 assert_different_registers(Rop1, Z_R0); 3790 assert_different_registers(Rop1, Rbase, Z_R1); 3791 3792 // First encode register oop and then compare with cOop in memory. 3793 // This sequence saves an unnecessary cOop load and decode. 3794 if (base == nullptr) { 3795 if (shift == 0) { 3796 z_cl(Rop1, disp, Rbase); // Unscaled 3797 } else { 3798 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3799 z_cl(Z_R0, disp, Rbase); 3800 } 3801 } else { // HeapBased 3802 #ifdef ASSERT 3803 bool used_R0 = true; 3804 bool used_R1 = true; 3805 #endif 3806 Register current = Rop1; 3807 Label done; 3808 3809 if (maybenull) { // null pointer must be preserved! 3810 z_ltgr(Z_R0, current); 3811 z_bre(done); 3812 current = Z_R0; 3813 } 3814 3815 unsigned int base_h = ((unsigned long)base)>>32; 3816 unsigned int base_l = (unsigned int)((unsigned long)base); 3817 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3818 lgr_if_needed(Z_R0, current); 3819 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 3820 } else if ((base_h == 0) && (base_l != 0)) { 3821 lgr_if_needed(Z_R0, current); 3822 z_agfi(Z_R0, -(int)base_l); 3823 } else { 3824 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3825 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 3826 } 3827 3828 if (shift != 0) { 3829 z_srlg(Z_R0, Z_R0, shift); 3830 } 3831 bind(done); 3832 z_cl(Z_R0, disp, Rbase); 3833 #ifdef ASSERT 3834 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3835 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3836 #endif 3837 } 3838 } else { 3839 z_clg(Rop1, disp, Z_R0, Rbase); 3840 } 3841 BLOCK_COMMENT("} compare klass ptr"); 3842 } 3843 3844 //--------------------------- 3845 // Compressed oops 3846 //--------------------------- 3847 3848 void MacroAssembler::encode_heap_oop(Register oop) { 3849 oop_encoder(oop, oop, true /*maybe null*/); 3850 } 3851 3852 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 3853 oop_encoder(oop, oop, false /*not null*/); 3854 } 3855 3856 // Called with something derived from the oop base. e.g. oop_base>>3. 3857 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 3858 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 3859 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 3860 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 3861 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 3862 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 3863 + (oop_base_lh == 0 ? 0:1) 3864 + (oop_base_hl == 0 ? 0:1) 3865 + (oop_base_hh == 0 ? 0:1); 3866 3867 assert(oop_base != 0, "This is for HeapBased cOops only"); 3868 3869 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 3870 uint64_t pow2_offset = 0x10000 - oop_base_ll; 3871 if (pow2_offset < 0x8000) { // This might not be necessary. 3872 uint64_t oop_base2 = oop_base + pow2_offset; 3873 3874 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 3875 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 3876 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 3877 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 3878 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 3879 (oop_base_lh == 0 ? 0:1) + 3880 (oop_base_hl == 0 ? 0:1) + 3881 (oop_base_hh == 0 ? 0:1); 3882 if (n_notzero_parts == 1) { 3883 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 3884 return -pow2_offset; 3885 } 3886 } 3887 } 3888 return 0; 3889 } 3890 3891 // If base address is offset from a straight power of two by just a few pages, 3892 // return this offset to the caller for a possible later composite add. 3893 // TODO/FIX: will only work correctly for 4k pages. 3894 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 3895 int pow2_offset = get_oop_base_pow2_offset(oop_base); 3896 3897 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 3898 3899 return pow2_offset; 3900 } 3901 3902 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 3903 int offset = get_oop_base(Rbase, oop_base); 3904 z_lcgr(Rbase, Rbase); 3905 return -offset; 3906 } 3907 3908 // Compare compressed oop in memory against oop in register. 3909 // Rop1 - Oop in register. 3910 // disp - Offset of cOop in memory. 3911 // Rbase - Base address of cOop in memory. 3912 // maybenull - True if Rop1 possibly is a null. 3913 // maybenulltarget - Branch target for Rop1 == nullptr, if flow control shall NOT continue with compare instruction. 3914 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybenull) { 3915 Register Rbase = mem.baseOrR0(); 3916 Register Rindex = mem.indexOrR0(); 3917 int64_t disp = mem.disp(); 3918 3919 const int shift = CompressedOops::shift(); 3920 address base = CompressedOops::base(); 3921 3922 assert(UseCompressedOops, "must be on to call this method"); 3923 assert(Universe::heap() != nullptr, "java heap must be initialized to call this method"); 3924 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3925 assert_different_registers(Rop1, Z_R0); 3926 assert_different_registers(Rop1, Rbase, Z_R1); 3927 assert_different_registers(Rop1, Rindex, Z_R1); 3928 3929 BLOCK_COMMENT("compare heap oop {"); 3930 3931 // First encode register oop and then compare with cOop in memory. 3932 // This sequence saves an unnecessary cOop load and decode. 3933 if (base == nullptr) { 3934 if (shift == 0) { 3935 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 3936 } else { 3937 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3938 z_cl(Z_R0, disp, Rindex, Rbase); 3939 } 3940 } else { // HeapBased 3941 #ifdef ASSERT 3942 bool used_R0 = true; 3943 bool used_R1 = true; 3944 #endif 3945 Label done; 3946 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3947 3948 if (maybenull) { // null pointer must be preserved! 3949 z_ltgr(Z_R0, Rop1); 3950 z_bre(done); 3951 } 3952 3953 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 3954 z_srlg(Z_R0, Z_R0, shift); 3955 3956 bind(done); 3957 z_cl(Z_R0, disp, Rindex, Rbase); 3958 #ifdef ASSERT 3959 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3960 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3961 #endif 3962 } 3963 BLOCK_COMMENT("} compare heap oop"); 3964 } 3965 3966 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 3967 const Address& addr, Register val, 3968 Register tmp1, Register tmp2, Register tmp3) { 3969 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3970 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 3971 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3972 decorators = AccessInternal::decorator_fixup(decorators, type); 3973 bool as_raw = (decorators & AS_RAW) != 0; 3974 if (as_raw) { 3975 bs->BarrierSetAssembler::store_at(this, decorators, type, 3976 addr, val, 3977 tmp1, tmp2, tmp3); 3978 } else { 3979 bs->store_at(this, decorators, type, 3980 addr, val, 3981 tmp1, tmp2, tmp3); 3982 } 3983 } 3984 3985 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 3986 const Address& addr, Register dst, 3987 Register tmp1, Register tmp2, Label *is_null) { 3988 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3989 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 3990 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3991 decorators = AccessInternal::decorator_fixup(decorators, type); 3992 bool as_raw = (decorators & AS_RAW) != 0; 3993 if (as_raw) { 3994 bs->BarrierSetAssembler::load_at(this, decorators, type, 3995 addr, dst, 3996 tmp1, tmp2, is_null); 3997 } else { 3998 bs->load_at(this, decorators, type, 3999 addr, dst, 4000 tmp1, tmp2, is_null); 4001 } 4002 } 4003 4004 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 4005 Register tmp1, Register tmp2, 4006 DecoratorSet decorators, Label *is_null) { 4007 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 4008 } 4009 4010 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 4011 Register tmp1, Register tmp2, Register tmp3, 4012 DecoratorSet decorators) { 4013 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 4014 } 4015 4016 //------------------------------------------------- 4017 // Encode compressed oop. Generally usable encoder. 4018 //------------------------------------------------- 4019 // Rsrc - contains regular oop on entry. It remains unchanged. 4020 // Rdst - contains compressed oop on exit. 4021 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 4022 // 4023 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 4024 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 4025 // 4026 // only32bitValid is set, if later code only uses the lower 32 bits. In this 4027 // case we must not fix the upper 32 bits. 4028 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybenull, 4029 Register Rbase, int pow2_offset, bool only32bitValid) { 4030 4031 const address oop_base = CompressedOops::base(); 4032 const int oop_shift = CompressedOops::shift(); 4033 const bool disjoint = CompressedOops::base_disjoint(); 4034 4035 assert(UseCompressedOops, "must be on to call this method"); 4036 assert(Universe::heap() != nullptr, "java heap must be initialized to call this encoder"); 4037 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 4038 4039 if (disjoint || (oop_base == nullptr)) { 4040 BLOCK_COMMENT("cOop encoder zeroBase {"); 4041 if (oop_shift == 0) { 4042 if (oop_base != nullptr && !only32bitValid) { 4043 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 4044 } else { 4045 lgr_if_needed(Rdst, Rsrc); 4046 } 4047 } else { 4048 z_srlg(Rdst, Rsrc, oop_shift); 4049 if (oop_base != nullptr && !only32bitValid) { 4050 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4051 } 4052 } 4053 BLOCK_COMMENT("} cOop encoder zeroBase"); 4054 return; 4055 } 4056 4057 bool used_R0 = false; 4058 bool used_R1 = false; 4059 4060 BLOCK_COMMENT("cOop encoder general {"); 4061 assert_different_registers(Rdst, Z_R1); 4062 assert_different_registers(Rsrc, Rbase); 4063 if (maybenull) { 4064 Label done; 4065 // We reorder shifting and subtracting, so that we can compare 4066 // and shift in parallel: 4067 // 4068 // cycle 0: potential LoadN, base = <const> 4069 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 4070 // cycle 2: if (cr) br, dst = dst + base + offset 4071 4072 // Get oop_base components. 4073 if (pow2_offset == -1) { 4074 if (Rdst == Rbase) { 4075 if (Rdst == Z_R1 || Rsrc == Z_R1) { 4076 Rbase = Z_R0; 4077 used_R0 = true; 4078 } else { 4079 Rdst = Z_R1; 4080 used_R1 = true; 4081 } 4082 } 4083 if (Rbase == Z_R1) { 4084 used_R1 = true; 4085 } 4086 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 4087 } 4088 assert_different_registers(Rdst, Rbase); 4089 4090 // Check for null oop (must be left alone) and shift. 4091 if (oop_shift != 0) { // Shift out alignment bits 4092 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 4093 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4094 } else { 4095 z_srlg(Rdst, Rsrc, oop_shift); 4096 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 4097 // This probably is faster, as it does not write a register. No! 4098 // z_cghi(Rsrc, 0); 4099 } 4100 } else { 4101 z_ltgr(Rdst, Rsrc); // Move null to result register. 4102 } 4103 z_bre(done); 4104 4105 // Subtract oop_base components. 4106 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 4107 z_algr(Rdst, Rbase); 4108 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 4109 } else { 4110 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4111 } 4112 if (!only32bitValid) { 4113 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4114 } 4115 bind(done); 4116 4117 } else { // not null 4118 // Get oop_base components. 4119 if (pow2_offset == -1) { 4120 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4121 } 4122 4123 // Subtract oop_base components and shift. 4124 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4125 // Don't use lay instruction. 4126 if (Rdst == Rsrc) { 4127 z_algr(Rdst, Rbase); 4128 } else { 4129 lgr_if_needed(Rdst, Rbase); 4130 z_algr(Rdst, Rsrc); 4131 } 4132 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4133 } else { 4134 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4135 } 4136 if (oop_shift != 0) { // Shift out alignment bits. 4137 z_srlg(Rdst, Rdst, oop_shift); 4138 } 4139 if (!only32bitValid) { 4140 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4141 } 4142 } 4143 #ifdef ASSERT 4144 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4145 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4146 #endif 4147 BLOCK_COMMENT("} cOop encoder general"); 4148 } 4149 4150 //------------------------------------------------- 4151 // decode compressed oop. Generally usable decoder. 4152 //------------------------------------------------- 4153 // Rsrc - contains compressed oop on entry. 4154 // Rdst - contains regular oop on exit. 4155 // Rdst and Rsrc may indicate same register. 4156 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4157 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4158 // Rbase - register to use for the base 4159 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4160 // For performance, it is good to 4161 // - avoid Z_R0 for any of the argument registers. 4162 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4163 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4164 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybenull, Register Rbase, int pow2_offset) { 4165 4166 const address oop_base = CompressedOops::base(); 4167 const int oop_shift = CompressedOops::shift(); 4168 const bool disjoint = CompressedOops::base_disjoint(); 4169 4170 assert(UseCompressedOops, "must be on to call this method"); 4171 assert(Universe::heap() != nullptr, "java heap must be initialized to call this decoder"); 4172 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4173 "cOop encoder detected bad shift"); 4174 4175 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4176 4177 if (oop_base != nullptr) { 4178 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4179 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4180 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4181 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4182 BLOCK_COMMENT("cOop decoder disjointBase {"); 4183 // We do not need to load the base. Instead, we can install the upper bits 4184 // with an OR instead of an ADD. 4185 Label done; 4186 4187 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4188 if (maybenull) { // null pointer must be preserved! 4189 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4190 z_bre(done); 4191 } else { 4192 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4193 } 4194 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4195 z_oihf(Rdst, oop_base_hf); 4196 } else if (oop_base_hl != 0) { 4197 z_oihl(Rdst, oop_base_hl); 4198 } else { 4199 assert(oop_base_hh != 0, "not heapbased mode"); 4200 z_oihh(Rdst, oop_base_hh); 4201 } 4202 bind(done); 4203 BLOCK_COMMENT("} cOop decoder disjointBase"); 4204 } else { 4205 BLOCK_COMMENT("cOop decoder general {"); 4206 // There are three decode steps: 4207 // scale oop offset (shift left) 4208 // get base (in reg) and pow2_offset (constant) 4209 // add base, pow2_offset, and oop offset 4210 // The following register overlap situations may exist: 4211 // Rdst == Rsrc, Rbase any other 4212 // not a problem. Scaling in-place leaves Rbase undisturbed. 4213 // Loading Rbase does not impact the scaled offset. 4214 // Rdst == Rbase, Rsrc any other 4215 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4216 // would destroy the scaled offset. 4217 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4218 // use Rbase_tmp if base has to be loaded. 4219 // Rsrc == Rbase, Rdst any other 4220 // Only possible without preloaded Rbase. 4221 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4222 // Rsrc == Rbase, Rdst == Rbase 4223 // Only possible without preloaded Rbase. 4224 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4225 // Remedy: use Rbase_tmp. 4226 // 4227 Label done; 4228 Register Rdst_tmp = Rdst; 4229 Register Rbase_tmp = Rbase; 4230 bool used_R0 = false; 4231 bool used_R1 = false; 4232 bool base_preloaded = pow2_offset >= 0; 4233 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4234 assert(oop_shift != 0, "room for optimization"); 4235 4236 // Check if we need to use scratch registers. 4237 if (Rdst == Rbase) { 4238 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4239 if (Rdst != Rsrc) { 4240 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4241 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4242 } else { 4243 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4244 } 4245 } 4246 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4247 4248 // Scale oop and check for null. 4249 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4250 if (maybenull) { // null pointer must be preserved! 4251 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4252 z_bre(done); 4253 } else { 4254 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4255 } 4256 4257 // Get oop_base components. 4258 if (!base_preloaded) { 4259 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4260 } 4261 4262 // Add up all components. 4263 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4264 z_algr(Rdst_tmp, Rbase_tmp); 4265 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4266 } else { 4267 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4268 } 4269 4270 bind(done); 4271 lgr_if_needed(Rdst, Rdst_tmp); 4272 #ifdef ASSERT 4273 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4274 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4275 #endif 4276 BLOCK_COMMENT("} cOop decoder general"); 4277 } 4278 } else { 4279 BLOCK_COMMENT("cOop decoder zeroBase {"); 4280 if (oop_shift == 0) { 4281 lgr_if_needed(Rdst, Rsrc); 4282 } else { 4283 z_sllg(Rdst, Rsrc, oop_shift); 4284 } 4285 BLOCK_COMMENT("} cOop decoder zeroBase"); 4286 } 4287 } 4288 4289 // ((OopHandle)result).resolve(); 4290 void MacroAssembler::resolve_oop_handle(Register result) { 4291 // OopHandle::resolve is an indirection. 4292 z_lg(result, 0, result); 4293 } 4294 4295 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4296 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4297 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset())); 4298 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4299 resolve_oop_handle(mirror); 4300 } 4301 4302 void MacroAssembler::load_method_holder(Register holder, Register method) { 4303 mem2reg_opt(holder, Address(method, Method::const_offset())); 4304 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4305 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset())); 4306 } 4307 4308 //--------------------------------------------------------------- 4309 //--- Operations on arrays. 4310 //--------------------------------------------------------------- 4311 4312 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4313 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4314 // work registers anyway. 4315 // Actually, only r0, r1, and r5 are killed. 4316 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4317 4318 int block_start = offset(); 4319 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4320 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4321 4322 Label doXC, doMVCLE, done; 4323 4324 BLOCK_COMMENT("Clear_Array {"); 4325 4326 // Check for zero len and convert to long. 4327 z_ltgfr(odd_tmp_reg, cnt_arg); 4328 z_bre(done); // Nothing to do if len == 0. 4329 4330 // Prefetch data to be cleared. 4331 if (VM_Version::has_Prefetch()) { 4332 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4333 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4334 } 4335 4336 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4337 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4338 z_brnh(doXC); // If so, use executed XC to clear. 4339 4340 // MVCLE: initialize long arrays (general case). 4341 bind(doMVCLE); 4342 z_lgr(dst_addr, base_pointer_arg); 4343 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4344 // The even register of the register pair is not killed. 4345 clear_reg(odd_tmp_reg, true, false); 4346 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4347 z_bru(done); 4348 4349 // XC: initialize short arrays. 4350 Label XC_template; // Instr template, never exec directly! 4351 bind(XC_template); 4352 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4353 4354 bind(doXC); 4355 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4356 if (VM_Version::has_ExecuteExtensions()) { 4357 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4358 } else { 4359 z_larl(odd_tmp_reg, XC_template); 4360 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4361 } 4362 // z_bru(done); // fallthru 4363 4364 bind(done); 4365 4366 BLOCK_COMMENT("} Clear_Array"); 4367 4368 int block_end = offset(); 4369 return block_end - block_start; 4370 } 4371 4372 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4373 // Emitter does not KILL any arguments nor work registers. 4374 // Emitter generates up to 16 XC instructions, depending on the array length. 4375 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4376 int block_start = offset(); 4377 int off; 4378 int lineSize_Bytes = AllocatePrefetchStepSize; 4379 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4380 bool doPrefetch = VM_Version::has_Prefetch(); 4381 int XC_maxlen = 256; 4382 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4383 4384 BLOCK_COMMENT("Clear_Array_Const {"); 4385 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4386 4387 // Do less prefetching for very short arrays. 4388 if (numXCInstr > 0) { 4389 // Prefetch only some cache lines, then begin clearing. 4390 if (doPrefetch) { 4391 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4392 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4393 } else { 4394 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4395 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4396 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4397 } 4398 } 4399 } 4400 4401 for (off=0; off<(numXCInstr-1); off++) { 4402 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4403 4404 // Prefetch some cache lines in advance. 4405 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4406 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4407 } 4408 } 4409 if (off*XC_maxlen < cnt*BytesPerWord) { 4410 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4411 } 4412 } 4413 BLOCK_COMMENT("} Clear_Array_Const"); 4414 4415 int block_end = offset(); 4416 return block_end - block_start; 4417 } 4418 4419 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4420 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4421 // work registers anyway. 4422 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4423 // 4424 // For very large arrays, exploit MVCLE H/W support. 4425 // MVCLE instruction automatically exploits H/W-optimized page mover. 4426 // - Bytes up to next page boundary are cleared with a series of XC to self. 4427 // - All full pages are cleared with the page mover H/W assist. 4428 // - Remaining bytes are again cleared by a series of XC to self. 4429 // 4430 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4431 4432 int block_start = offset(); 4433 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4434 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4435 4436 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4437 4438 // Get len to clear. 4439 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4440 4441 // Prepare other args to MVCLE. 4442 z_lgr(dst_addr, base_pointer_arg); 4443 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4444 // The even register of the register pair is not killed. 4445 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4446 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4447 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4448 4449 int block_end = offset(); 4450 return block_end - block_start; 4451 } 4452 4453 // Allocator. 4454 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4455 Register cnt_reg, 4456 Register tmp1_reg, Register tmp2_reg) { 4457 // Tmp1 is oddReg. 4458 // Tmp2 is evenReg. 4459 4460 int block_start = offset(); 4461 Label doMVC, doMVCLE, done, MVC_template; 4462 4463 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4464 4465 // Check for zero len and convert to long. 4466 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4467 z_bre(done); // Nothing to do if len == 0. 4468 4469 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4470 4471 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4472 z_brnh(doMVC); // If so, use executed MVC to clear. 4473 4474 bind(doMVCLE); // A lot of data (more than 256 bytes). 4475 // Prep dest reg pair. 4476 z_lgr(Z_R0, dst_reg); // dst addr 4477 // Dst len already in Z_R1. 4478 // Prep src reg pair. 4479 z_lgr(tmp2_reg, src_reg); // src addr 4480 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4481 4482 // Do the copy. 4483 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4484 z_bru(done); // All done. 4485 4486 bind(MVC_template); // Just some data (not more than 256 bytes). 4487 z_mvc(0, 0, dst_reg, 0, src_reg); 4488 4489 bind(doMVC); 4490 4491 if (VM_Version::has_ExecuteExtensions()) { 4492 add2reg(Z_R1, -1); 4493 } else { 4494 add2reg(tmp1_reg, -1, Z_R1); 4495 z_larl(Z_R1, MVC_template); 4496 } 4497 4498 if (VM_Version::has_Prefetch()) { 4499 z_pfd(1, 0,Z_R0,src_reg); 4500 z_pfd(2, 0,Z_R0,dst_reg); 4501 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4502 // z_pfd(2,256,Z_R0,dst_reg); 4503 } 4504 4505 if (VM_Version::has_ExecuteExtensions()) { 4506 z_exrl(Z_R1, MVC_template); 4507 } else { 4508 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4509 } 4510 4511 bind(done); 4512 4513 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4514 4515 int block_end = offset(); 4516 return block_end - block_start; 4517 } 4518 4519 //------------------------------------------------- 4520 // Constants (scalar and oop) in constant pool 4521 //------------------------------------------------- 4522 4523 // Add a non-relocated constant to the CP. 4524 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4525 long value = val.value(); 4526 address tocPos = long_constant(value); 4527 4528 if (tocPos != nullptr) { 4529 int tocOffset = (int)(tocPos - code()->consts()->start()); 4530 return tocOffset; 4531 } 4532 // Address_constant returned null, so no constant entry has been created. 4533 // In that case, we return a "fatal" offset, just in case that subsequently 4534 // generated access code is executed. 4535 return -1; 4536 } 4537 4538 // Returns the TOC offset where the address is stored. 4539 // Add a relocated constant to the CP. 4540 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4541 // Use RelocationHolder::none for the constant pool entry. 4542 // Otherwise we will end up with a failing NativeCall::verify(x), 4543 // where x is the address of the constant pool entry. 4544 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4545 4546 if (tocPos != nullptr) { 4547 int tocOffset = (int)(tocPos - code()->consts()->start()); 4548 RelocationHolder rsp = oop.rspec(); 4549 Relocation *rel = rsp.reloc(); 4550 4551 // Store toc_offset in relocation, used by call_far_patchable. 4552 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4553 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4554 } 4555 // Relocate at the load's pc. 4556 relocate(rsp); 4557 4558 return tocOffset; 4559 } 4560 // Address_constant returned null, so no constant entry has been created 4561 // in that case, we return a "fatal" offset, just in case that subsequently 4562 // generated access code is executed. 4563 return -1; 4564 } 4565 4566 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4567 int tocOffset = store_const_in_toc(a); 4568 if (tocOffset == -1) return false; 4569 address tocPos = tocOffset + code()->consts()->start(); 4570 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4571 relocate(a.rspec()); 4572 load_long_pcrelative(dst, tocPos); 4573 return true; 4574 } 4575 4576 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4577 int tocOffset = store_oop_in_toc(a); 4578 if (tocOffset == -1) return false; 4579 address tocPos = tocOffset + code()->consts()->start(); 4580 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4581 4582 load_addr_pcrelative(dst, tocPos); 4583 return true; 4584 } 4585 4586 // If the instruction sequence at the given pc is a load_const_from_toc 4587 // sequence, return the value currently stored at the referenced position 4588 // in the TOC. 4589 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4590 4591 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4592 4593 long offset = get_load_const_from_toc_offset(pc); 4594 address dataLoc = nullptr; 4595 if (is_load_const_from_toc_pcrelative(pc)) { 4596 dataLoc = pc + offset; 4597 } else { 4598 CodeBlob* cb = CodeCache::find_blob(pc); 4599 assert(cb && cb->is_nmethod(), "sanity"); 4600 nmethod* nm = (nmethod*)cb; 4601 dataLoc = nm->ctable_begin() + offset; 4602 } 4603 return *(intptr_t *)dataLoc; 4604 } 4605 4606 // If the instruction sequence at the given pc is a load_const_from_toc 4607 // sequence, copy the passed-in new_data value into the referenced 4608 // position in the TOC. 4609 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 4610 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4611 4612 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 4613 address dataLoc = nullptr; 4614 if (is_load_const_from_toc_pcrelative(pc)) { 4615 dataLoc = pc+offset; 4616 } else { 4617 nmethod* nm = CodeCache::find_nmethod(pc); 4618 assert((cb == nullptr) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 4619 dataLoc = nm->ctable_begin() + offset; 4620 } 4621 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 4622 *(unsigned long *)dataLoc = new_data; 4623 } 4624 } 4625 4626 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 4627 // site. Verify by calling is_load_const_from_toc() before!! 4628 // Offset is +/- 2**32 -> use long. 4629 long MacroAssembler::get_load_const_from_toc_offset(address a) { 4630 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 4631 // expected code sequence: 4632 // z_lgrl(t, simm32); len = 6 4633 unsigned long inst; 4634 unsigned int len = get_instruction(a, &inst); 4635 return get_pcrel_offset(inst); 4636 } 4637 4638 //********************************************************************************** 4639 // inspection of generated instruction sequences for a particular pattern 4640 //********************************************************************************** 4641 4642 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 4643 #ifdef ASSERT 4644 unsigned long inst; 4645 unsigned int len = get_instruction(a+2, &inst); 4646 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 4647 const int range = 128; 4648 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 4649 VM_Version::z_SIGSEGV(); 4650 } 4651 #endif 4652 // expected code sequence: 4653 // z_lgrl(t, relAddr32); len = 6 4654 //TODO: verify accessed data is in CP, if possible. 4655 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 4656 } 4657 4658 bool MacroAssembler::is_load_const_from_toc_call(address a) { 4659 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 4660 } 4661 4662 bool MacroAssembler::is_load_const_call(address a) { 4663 return is_load_const(a) && is_call_byregister(a + load_const_size()); 4664 } 4665 4666 //------------------------------------------------- 4667 // Emitters for some really CICS instructions 4668 //------------------------------------------------- 4669 4670 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 4671 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 4672 assert(src->encoding()%2==0, "must be an even/odd register pair"); 4673 assert(pad<256, "must be a padding BYTE"); 4674 4675 Label retry; 4676 bind(retry); 4677 Assembler::z_mvcle(dst, src, pad); 4678 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4679 } 4680 4681 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 4682 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4683 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4684 assert(pad<256, "must be a padding BYTE"); 4685 4686 Label retry; 4687 bind(retry); 4688 Assembler::z_clcle(left, right, pad, Z_R0); 4689 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4690 } 4691 4692 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 4693 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4694 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4695 assert(pad<=0xfff, "must be a padding HALFWORD"); 4696 assert(VM_Version::has_ETF2(), "instruction must be available"); 4697 4698 Label retry; 4699 bind(retry); 4700 Assembler::z_clclu(left, right, pad, Z_R0); 4701 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4702 } 4703 4704 void MacroAssembler::search_string(Register end, Register start) { 4705 assert(end->encoding() != 0, "end address must not be in R0"); 4706 assert(start->encoding() != 0, "start address must not be in R0"); 4707 4708 Label retry; 4709 bind(retry); 4710 Assembler::z_srst(end, start); 4711 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4712 } 4713 4714 void MacroAssembler::search_string_uni(Register end, Register start) { 4715 assert(end->encoding() != 0, "end address must not be in R0"); 4716 assert(start->encoding() != 0, "start address must not be in R0"); 4717 assert(VM_Version::has_ETF3(), "instruction must be available"); 4718 4719 Label retry; 4720 bind(retry); 4721 Assembler::z_srstu(end, start); 4722 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4723 } 4724 4725 void MacroAssembler::kmac(Register srcBuff) { 4726 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4727 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4728 4729 Label retry; 4730 bind(retry); 4731 Assembler::z_kmac(Z_R0, srcBuff); 4732 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4733 } 4734 4735 void MacroAssembler::kimd(Register srcBuff) { 4736 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4737 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4738 4739 Label retry; 4740 bind(retry); 4741 Assembler::z_kimd(Z_R0, srcBuff); 4742 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4743 } 4744 4745 void MacroAssembler::klmd(Register srcBuff) { 4746 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4747 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4748 4749 Label retry; 4750 bind(retry); 4751 Assembler::z_klmd(Z_R0, srcBuff); 4752 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4753 } 4754 4755 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 4756 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4757 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4758 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4759 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4760 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4761 4762 Label retry; 4763 bind(retry); 4764 Assembler::z_km(dstBuff, srcBuff); 4765 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4766 } 4767 4768 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 4769 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4770 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4771 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4772 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4773 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4774 4775 Label retry; 4776 bind(retry); 4777 Assembler::z_kmc(dstBuff, srcBuff); 4778 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4779 } 4780 4781 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) { 4782 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4783 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4784 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4785 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0"); 4786 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0"); 4787 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register"); 4788 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4789 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4790 4791 Label retry; 4792 bind(retry); 4793 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff); 4794 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4795 } 4796 4797 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 4798 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4799 4800 Label retry; 4801 bind(retry); 4802 Assembler::z_cksm(crcBuff, srcBuff); 4803 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4804 } 4805 4806 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 4807 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4808 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4809 4810 Label retry; 4811 bind(retry); 4812 Assembler::z_troo(r1, r2, m3); 4813 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4814 } 4815 4816 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 4817 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4818 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4819 4820 Label retry; 4821 bind(retry); 4822 Assembler::z_trot(r1, r2, m3); 4823 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4824 } 4825 4826 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 4827 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4828 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4829 4830 Label retry; 4831 bind(retry); 4832 Assembler::z_trto(r1, r2, m3); 4833 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4834 } 4835 4836 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 4837 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4838 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4839 4840 Label retry; 4841 bind(retry); 4842 Assembler::z_trtt(r1, r2, m3); 4843 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4844 } 4845 4846 //--------------------------------------- 4847 // Helpers for Intrinsic Emitters 4848 //--------------------------------------- 4849 4850 /** 4851 * uint32_t crc; 4852 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4853 */ 4854 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 4855 assert_different_registers(crc, table, tmp); 4856 assert_different_registers(val, table); 4857 if (crc == val) { // Must rotate first to use the unmodified value. 4858 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4859 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4860 } else { 4861 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4862 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4863 } 4864 z_x(crc, Address(table, tmp, 0)); 4865 } 4866 4867 /** 4868 * uint32_t crc; 4869 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4870 */ 4871 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 4872 fold_byte_crc32(crc, crc, table, tmp); 4873 } 4874 4875 /** 4876 * Emits code to update CRC-32 with a byte value according to constants in table. 4877 * 4878 * @param [in,out]crc Register containing the crc. 4879 * @param [in]val Register containing the byte to fold into the CRC. 4880 * @param [in]table Register containing the table of crc constants. 4881 * 4882 * uint32_t crc; 4883 * val = crc_table[(val ^ crc) & 0xFF]; 4884 * crc = val ^ (crc >> 8); 4885 */ 4886 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 4887 z_xr(val, crc); 4888 fold_byte_crc32(crc, val, table, val); 4889 } 4890 4891 4892 /** 4893 * @param crc register containing existing CRC (32-bit) 4894 * @param buf register pointing to input byte buffer (byte*) 4895 * @param len register containing number of bytes 4896 * @param table register pointing to CRC table 4897 */ 4898 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 4899 assert_different_registers(crc, buf, len, table, data); 4900 4901 Label L_mainLoop, L_done; 4902 const int mainLoop_stepping = 1; 4903 4904 // Process all bytes in a single-byte loop. 4905 z_ltr(len, len); 4906 z_brnh(L_done); 4907 4908 bind(L_mainLoop); 4909 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4910 add2reg(buf, mainLoop_stepping); // Advance buffer position. 4911 update_byte_crc32(crc, data, table); 4912 z_brct(len, L_mainLoop); // Iterate. 4913 4914 bind(L_done); 4915 } 4916 4917 /** 4918 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 4919 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 4920 * 4921 */ 4922 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 4923 Register t0, Register t1, Register t2, Register t3) { 4924 // This is what we implement (the DOBIG4 part): 4925 // 4926 // #define DOBIG4 c ^= *++buf4; \ 4927 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 4928 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 4929 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 4930 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 4931 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 4932 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 4933 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 4934 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 4935 4936 // XOR crc with next four bytes of buffer. 4937 lgr_if_needed(t0, crc); 4938 z_x(t0, Address(buf, bufDisp)); 4939 if (bufInc != 0) { 4940 add2reg(buf, bufInc); 4941 } 4942 4943 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 4944 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 4945 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 4946 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 4947 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 4948 4949 // XOR indexed table values to calculate updated crc. 4950 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 4951 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 4952 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 4953 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 4954 z_xr(t0, t2); // Now t0 contains the updated CRC value. 4955 lgr_if_needed(crc, t0); 4956 } 4957 4958 /** 4959 * @param crc register containing existing CRC (32-bit) 4960 * @param buf register pointing to input byte buffer (byte*) 4961 * @param len register containing number of bytes 4962 * @param table register pointing to CRC table 4963 * 4964 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 4965 */ 4966 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 4967 Register t0, Register t1, Register t2, Register t3, 4968 bool invertCRC) { 4969 assert_different_registers(crc, buf, len, table); 4970 4971 Label L_mainLoop, L_tail; 4972 Register data = t0; 4973 Register ctr = Z_R0; 4974 const int mainLoop_stepping = 4; 4975 const int log_stepping = exact_log2(mainLoop_stepping); 4976 4977 // Don't test for len <= 0 here. This pathological case should not occur anyway. 4978 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 4979 // The situation itself is detected and handled correctly by the conditional branches 4980 // following aghi(len, -stepping) and aghi(len, +stepping). 4981 4982 if (invertCRC) { 4983 not_(crc, noreg, false); // 1s complement of crc 4984 } 4985 4986 // Check for short (<4 bytes) buffer. 4987 z_srag(ctr, len, log_stepping); 4988 z_brnh(L_tail); 4989 4990 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 4991 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 4992 4993 BIND(L_mainLoop); 4994 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 4995 z_brct(ctr, L_mainLoop); // Iterate. 4996 4997 z_lrvr(crc, crc); // Revert byte order back to original. 4998 4999 // Process last few (<8) bytes of buffer. 5000 BIND(L_tail); 5001 update_byteLoop_crc32(crc, buf, len, table, data); 5002 5003 if (invertCRC) { 5004 not_(crc, noreg, false); // 1s complement of crc 5005 } 5006 } 5007 5008 /** 5009 * @param crc register containing existing CRC (32-bit) 5010 * @param buf register pointing to input byte buffer (byte*) 5011 * @param len register containing number of bytes 5012 * @param table register pointing to CRC table 5013 */ 5014 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 5015 Register t0, Register t1, Register t2, Register t3, 5016 bool invertCRC) { 5017 assert_different_registers(crc, buf, len, table); 5018 Register data = t0; 5019 5020 if (invertCRC) { 5021 not_(crc, noreg, false); // 1s complement of crc 5022 } 5023 5024 update_byteLoop_crc32(crc, buf, len, table, data); 5025 5026 if (invertCRC) { 5027 not_(crc, noreg, false); // 1s complement of crc 5028 } 5029 } 5030 5031 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 5032 bool invertCRC) { 5033 assert_different_registers(crc, buf, len, table, tmp); 5034 5035 if (invertCRC) { 5036 not_(crc, noreg, false); // 1s complement of crc 5037 } 5038 5039 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 5040 update_byte_crc32(crc, tmp, table); 5041 5042 if (invertCRC) { 5043 not_(crc, noreg, false); // 1s complement of crc 5044 } 5045 } 5046 5047 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 5048 bool invertCRC) { 5049 assert_different_registers(crc, val, table); 5050 5051 if (invertCRC) { 5052 not_(crc, noreg, false); // 1s complement of crc 5053 } 5054 5055 update_byte_crc32(crc, val, table); 5056 5057 if (invertCRC) { 5058 not_(crc, noreg, false); // 1s complement of crc 5059 } 5060 } 5061 5062 // 5063 // Code for BigInteger::multiplyToLen() intrinsic. 5064 // 5065 5066 // dest_lo += src1 + src2 5067 // dest_hi += carry1 + carry2 5068 // Z_R7 is destroyed ! 5069 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 5070 Register src1, Register src2) { 5071 clear_reg(Z_R7); 5072 z_algr(dest_lo, src1); 5073 z_alcgr(dest_hi, Z_R7); 5074 z_algr(dest_lo, src2); 5075 z_alcgr(dest_hi, Z_R7); 5076 } 5077 5078 // Multiply 64 bit by 64 bit first loop. 5079 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 5080 Register x_xstart, 5081 Register y, Register y_idx, 5082 Register z, 5083 Register carry, 5084 Register product, 5085 Register idx, Register kdx) { 5086 // jlong carry, x[], y[], z[]; 5087 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 5088 // huge_128 product = y[idx] * x[xstart] + carry; 5089 // z[kdx] = (jlong)product; 5090 // carry = (jlong)(product >>> 64); 5091 // } 5092 // z[xstart] = carry; 5093 5094 Label L_first_loop, L_first_loop_exit; 5095 Label L_one_x, L_one_y, L_multiply; 5096 5097 z_aghi(xstart, -1); 5098 z_brl(L_one_x); // Special case: length of x is 1. 5099 5100 // Load next two integers of x. 5101 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5102 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5103 5104 5105 bind(L_first_loop); 5106 5107 z_aghi(idx, -1); 5108 z_brl(L_first_loop_exit); 5109 z_aghi(idx, -1); 5110 z_brl(L_one_y); 5111 5112 // Load next two integers of y. 5113 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 5114 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 5115 5116 5117 bind(L_multiply); 5118 5119 Register multiplicand = product->successor(); 5120 Register product_low = multiplicand; 5121 5122 lgr_if_needed(multiplicand, x_xstart); 5123 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 5124 clear_reg(Z_R7); 5125 z_algr(product_low, carry); // Add carry to result. 5126 z_alcgr(product, Z_R7); // Add carry of the last addition. 5127 add2reg(kdx, -2); 5128 5129 // Store result. 5130 z_sllg(Z_R7, kdx, LogBytesPerInt); 5131 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5132 lgr_if_needed(carry, product); 5133 z_bru(L_first_loop); 5134 5135 5136 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5137 5138 clear_reg(y_idx); 5139 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5140 z_bru(L_multiply); 5141 5142 5143 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5144 5145 clear_reg(x_xstart); 5146 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5147 z_bru(L_first_loop); 5148 5149 bind(L_first_loop_exit); 5150 } 5151 5152 // Multiply 64 bit by 64 bit and add 128 bit. 5153 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5154 Register z, 5155 Register yz_idx, Register idx, 5156 Register carry, Register product, 5157 int offset) { 5158 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5159 // z[kdx] = (jlong)product; 5160 5161 Register multiplicand = product->successor(); 5162 Register product_low = multiplicand; 5163 5164 z_sllg(Z_R7, idx, LogBytesPerInt); 5165 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5166 5167 lgr_if_needed(multiplicand, x_xstart); 5168 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5169 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5170 5171 add2_with_carry(product, product_low, carry, yz_idx); 5172 5173 z_sllg(Z_R7, idx, LogBytesPerInt); 5174 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5175 5176 } 5177 5178 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5179 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5180 Register y, Register z, 5181 Register yz_idx, Register idx, 5182 Register jdx, 5183 Register carry, Register product, 5184 Register carry2) { 5185 // jlong carry, x[], y[], z[]; 5186 // int kdx = ystart+1; 5187 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5188 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5189 // z[kdx+idx+1] = (jlong)product; 5190 // jlong carry2 = (jlong)(product >>> 64); 5191 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5192 // z[kdx+idx] = (jlong)product; 5193 // carry = (jlong)(product >>> 64); 5194 // } 5195 // idx += 2; 5196 // if (idx > 0) { 5197 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5198 // z[kdx+idx] = (jlong)product; 5199 // carry = (jlong)(product >>> 64); 5200 // } 5201 5202 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5203 5204 // scale the index 5205 lgr_if_needed(jdx, idx); 5206 and_imm(jdx, 0xfffffffffffffffcL); 5207 rshift(jdx, 2); 5208 5209 5210 bind(L_third_loop); 5211 5212 z_aghi(jdx, -1); 5213 z_brl(L_third_loop_exit); 5214 add2reg(idx, -4); 5215 5216 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5217 lgr_if_needed(carry2, product); 5218 5219 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5220 lgr_if_needed(carry, product); 5221 z_bru(L_third_loop); 5222 5223 5224 bind(L_third_loop_exit); // Handle any left-over operand parts. 5225 5226 and_imm(idx, 0x3); 5227 z_brz(L_post_third_loop_done); 5228 5229 Label L_check_1; 5230 5231 z_aghi(idx, -2); 5232 z_brl(L_check_1); 5233 5234 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5235 lgr_if_needed(carry, product); 5236 5237 5238 bind(L_check_1); 5239 5240 add2reg(idx, 0x2); 5241 and_imm(idx, 0x1); 5242 z_aghi(idx, -1); 5243 z_brl(L_post_third_loop_done); 5244 5245 Register multiplicand = product->successor(); 5246 Register product_low = multiplicand; 5247 5248 z_sllg(Z_R7, idx, LogBytesPerInt); 5249 clear_reg(yz_idx); 5250 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5251 lgr_if_needed(multiplicand, x_xstart); 5252 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5253 clear_reg(yz_idx); 5254 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5255 5256 add2_with_carry(product, product_low, yz_idx, carry); 5257 5258 z_sllg(Z_R7, idx, LogBytesPerInt); 5259 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5260 rshift(product_low, 32); 5261 5262 lshift(product, 32); 5263 z_ogr(product_low, product); 5264 lgr_if_needed(carry, product_low); 5265 5266 bind(L_post_third_loop_done); 5267 } 5268 5269 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5270 Register y, Register ylen, 5271 Register z, 5272 Register tmp1, Register tmp2, 5273 Register tmp3, Register tmp4, 5274 Register tmp5) { 5275 ShortBranchVerifier sbv(this); 5276 5277 assert_different_registers(x, xlen, y, ylen, z, 5278 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5279 assert_different_registers(x, xlen, y, ylen, z, 5280 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5281 5282 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5283 5284 const Register idx = tmp1; 5285 const Register kdx = tmp2; 5286 const Register xstart = tmp3; 5287 5288 const Register y_idx = tmp4; 5289 const Register carry = tmp5; 5290 const Register product = Z_R0_scratch; 5291 const Register x_xstart = Z_R8; 5292 5293 // First Loop. 5294 // 5295 // final static long LONG_MASK = 0xffffffffL; 5296 // int xstart = xlen - 1; 5297 // int ystart = ylen - 1; 5298 // long carry = 0; 5299 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5300 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5301 // z[kdx] = (int)product; 5302 // carry = product >>> 32; 5303 // } 5304 // z[xstart] = (int)carry; 5305 // 5306 5307 lgr_if_needed(idx, ylen); // idx = ylen 5308 z_agrk(kdx, xlen, ylen); // kdx = xlen + ylen 5309 clear_reg(carry); // carry = 0 5310 5311 Label L_done; 5312 5313 lgr_if_needed(xstart, xlen); 5314 z_aghi(xstart, -1); 5315 z_brl(L_done); 5316 5317 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5318 5319 NearLabel L_second_loop; 5320 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5321 5322 NearLabel L_carry; 5323 z_aghi(kdx, -1); 5324 z_brz(L_carry); 5325 5326 // Store lower 32 bits of carry. 5327 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5328 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5329 rshift(carry, 32); 5330 z_aghi(kdx, -1); 5331 5332 5333 bind(L_carry); 5334 5335 // Store upper 32 bits of carry. 5336 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5337 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5338 5339 // Second and third (nested) loops. 5340 // 5341 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5342 // carry = 0; 5343 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5344 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5345 // (z[k] & LONG_MASK) + carry; 5346 // z[k] = (int)product; 5347 // carry = product >>> 32; 5348 // } 5349 // z[i] = (int)carry; 5350 // } 5351 // 5352 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5353 5354 const Register jdx = tmp1; 5355 5356 bind(L_second_loop); 5357 5358 clear_reg(carry); // carry = 0; 5359 lgr_if_needed(jdx, ylen); // j = ystart+1 5360 5361 z_aghi(xstart, -1); // i = xstart-1; 5362 z_brl(L_done); 5363 5364 // Use free slots in the current stackframe instead of push/pop. 5365 Address zsave(Z_SP, _z_abi(carg_1)); 5366 reg2mem_opt(z, zsave); 5367 5368 5369 Label L_last_x; 5370 5371 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5372 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5373 z_aghi(xstart, -1); // i = xstart-1; 5374 z_brl(L_last_x); 5375 5376 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5377 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5378 5379 5380 Label L_third_loop_prologue; 5381 5382 bind(L_third_loop_prologue); 5383 5384 Address xsave(Z_SP, _z_abi(carg_2)); 5385 Address xlensave(Z_SP, _z_abi(carg_3)); 5386 Address ylensave(Z_SP, _z_abi(carg_4)); 5387 5388 reg2mem_opt(x, xsave); 5389 reg2mem_opt(xstart, xlensave); 5390 reg2mem_opt(ylen, ylensave); 5391 5392 5393 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5394 5395 mem2reg_opt(z, zsave); 5396 mem2reg_opt(x, xsave); 5397 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5398 mem2reg_opt(ylen, ylensave); 5399 5400 add2reg(tmp3, 1, xlen); 5401 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5402 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5403 z_aghi(tmp3, -1); 5404 z_brl(L_done); 5405 5406 rshift(carry, 32); 5407 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5408 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5409 z_bru(L_second_loop); 5410 5411 // Next infrequent code is moved outside loops. 5412 bind(L_last_x); 5413 5414 clear_reg(x_xstart); 5415 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5416 z_bru(L_third_loop_prologue); 5417 5418 bind(L_done); 5419 5420 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5421 } 5422 5423 void MacroAssembler::asm_assert(branch_condition cond, const char* msg, int id, bool is_static) { 5424 #ifdef ASSERT 5425 Label ok; 5426 z_brc(cond, ok); 5427 is_static ? stop_static(msg, id) : stop(msg, id); 5428 bind(ok); 5429 #endif // ASSERT 5430 } 5431 5432 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5433 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5434 #ifdef ASSERT 5435 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id); 5436 #endif // ASSERT 5437 } 5438 5439 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5440 Register mem_base, const char* msg, int id) { 5441 #ifdef ASSERT 5442 switch (size) { 5443 case 4: 5444 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5445 break; 5446 case 8: 5447 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5448 break; 5449 default: 5450 ShouldNotReachHere(); 5451 } 5452 // if relocation is not allowed then stop_static() will be called otherwise call stop() 5453 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id, !allow_relocation); 5454 #endif // ASSERT 5455 } 5456 5457 // Check the condition 5458 // expected_size == FP - SP 5459 // after transformation: 5460 // expected_size - FP + SP == 0 5461 // Destroys Register expected_size if no tmp register is passed. 5462 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5463 #ifdef ASSERT 5464 lgr_if_needed(tmp, expected_size); 5465 z_algr(tmp, Z_SP); 5466 z_slg(tmp, 0, Z_R0, Z_SP); 5467 asm_assert(bcondEqual, msg, id); 5468 #endif // ASSERT 5469 } 5470 5471 // Save and restore functions: Exclude Z_R0. 5472 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5473 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5474 if (include_fp) { 5475 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5476 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5477 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5478 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5479 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5480 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5481 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5482 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5483 } 5484 if (include_flags) { 5485 Label done; 5486 z_mvi(Address(dst, offset), 2); // encoding: equal 5487 z_bre(done); 5488 z_mvi(Address(dst, offset), 4); // encoding: higher 5489 z_brh(done); 5490 z_mvi(Address(dst, offset), 1); // encoding: lower 5491 bind(done); 5492 } 5493 } 5494 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5495 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5496 if (include_fp) { 5497 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5498 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5499 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5500 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5501 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5502 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5503 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5504 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5505 } 5506 if (include_flags) { 5507 z_cli(Address(src, offset), 2); // see encoding above 5508 } 5509 } 5510 5511 // Plausibility check for oops. 5512 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5513 if (!VerifyOops) return; 5514 5515 BLOCK_COMMENT("verify_oop {"); 5516 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5517 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5518 5519 save_return_pc(); 5520 5521 // Push frame, but preserve flags 5522 z_lgr(Z_R0, Z_SP); 5523 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5524 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5525 5526 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5527 5528 lgr_if_needed(Z_ARG2, oop); 5529 load_const_optimized(Z_ARG1, (address)msg); 5530 load_const_optimized(Z_R1, entry_addr); 5531 z_lg(Z_R1, 0, Z_R1); 5532 call_c(Z_R1); 5533 5534 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5535 pop_frame(); 5536 restore_return_pc(); 5537 5538 BLOCK_COMMENT("} verify_oop "); 5539 } 5540 5541 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5542 if (!VerifyOops) return; 5543 5544 BLOCK_COMMENT("verify_oop {"); 5545 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5546 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5547 5548 save_return_pc(); 5549 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5550 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5551 5552 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5553 load_const_optimized(Z_ARG1, (address)msg); 5554 load_const_optimized(Z_R1, entry_addr); 5555 z_lg(Z_R1, 0, Z_R1); 5556 call_c(Z_R1); 5557 5558 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5559 pop_frame(); 5560 restore_return_pc(); 5561 5562 BLOCK_COMMENT("} verify_oop "); 5563 } 5564 5565 const char* MacroAssembler::stop_types[] = { 5566 "stop", 5567 "untested", 5568 "unimplemented", 5569 "shouldnotreachhere" 5570 }; 5571 5572 static void stop_on_request(const char* tp, const char* msg) { 5573 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5574 guarantee(false, "Z assembly code requires stop: %s", msg); 5575 } 5576 5577 void MacroAssembler::stop(int type, const char* msg, int id) { 5578 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5579 5580 // Setup arguments. 5581 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5582 load_const(Z_ARG2, (void*) msg); 5583 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5584 save_return_pc(); // Saves return pc Z_R14. 5585 push_frame_abi160(0); 5586 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5587 // The plain disassembler does not recognize illtrap. It instead displays 5588 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5589 // the proper beginning of the next instruction. 5590 z_illtrap(id); // Illegal instruction. 5591 z_illtrap(id); // Illegal instruction. 5592 5593 BLOCK_COMMENT(" } stop"); 5594 } 5595 5596 // Special version of stop() for code size reduction. 5597 // Reuses the previously generated call sequence, if any. 5598 // Generates the call sequence on its own, if necessary. 5599 // Note: This code will work only in non-relocatable code! 5600 // The relative address of the data elements (arg1, arg2) must not change. 5601 // The reentry point must not move relative to it's users. This prerequisite 5602 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5603 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5604 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5605 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==nullptr?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5606 5607 // Setup arguments. 5608 if (allow_relocation) { 5609 // Relocatable version (for comparison purposes). Remove after some time. 5610 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5611 load_const(Z_ARG2, (void*) msg); 5612 } else { 5613 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 5614 load_absolute_address(Z_ARG2, (address)msg); 5615 } 5616 if ((reentry != nullptr) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 5617 BLOCK_COMMENT("branch to reentry point:"); 5618 z_brc(bcondAlways, reentry); 5619 } else { 5620 BLOCK_COMMENT("reentry point:"); 5621 reentry = pc(); // Re-entry point for subsequent stop calls. 5622 save_return_pc(); // Saves return pc Z_R14. 5623 push_frame_abi160(0); 5624 if (allow_relocation) { 5625 reentry = nullptr; // Prevent reentry if code relocation is allowed. 5626 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5627 } else { 5628 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5629 } 5630 z_illtrap(id); // Illegal instruction as emergency stop, should the above call return. 5631 } 5632 BLOCK_COMMENT(" } stop_chain"); 5633 5634 return reentry; 5635 } 5636 5637 // Special version of stop() for code size reduction. 5638 // Assumes constant relative addresses for data and runtime call. 5639 void MacroAssembler::stop_static(int type, const char* msg, int id) { 5640 stop_chain(nullptr, type, msg, id, false); 5641 } 5642 5643 void MacroAssembler::stop_subroutine() { 5644 unimplemented("stop_subroutine", 710); 5645 } 5646 5647 // Prints msg to stdout from within generated code.. 5648 void MacroAssembler::warn(const char* msg) { 5649 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 5650 load_absolute_address(Z_R1, (address) warning); 5651 load_absolute_address(Z_ARG1, (address) msg); 5652 (void) call(Z_R1); 5653 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 5654 } 5655 5656 #ifndef PRODUCT 5657 5658 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 5659 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 5660 if (!ZapEmptyStackFields) return; 5661 BLOCK_COMMENT("zap memory region {"); 5662 load_const_optimized(val, 0x0101010101010101); 5663 int size = before + after; 5664 if (low == high && size < 5 && size > 0) { 5665 int offset = -before*BytesPerWord; 5666 for (int i = 0; i < size; ++i) { 5667 z_stg(val, Address(low, offset)); 5668 offset +=(1*BytesPerWord); 5669 } 5670 } else { 5671 add2reg(addr, -before*BytesPerWord, low); 5672 if (after) { 5673 #ifdef ASSERT 5674 jlong check = after * BytesPerWord; 5675 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 5676 #endif 5677 add2reg(high, after * BytesPerWord); 5678 } 5679 NearLabel loop; 5680 bind(loop); 5681 z_stg(val, Address(addr)); 5682 add2reg(addr, 8); 5683 compare64_and_branch(addr, high, bcondNotHigh, loop); 5684 if (after) { 5685 add2reg(high, -after * BytesPerWord); 5686 } 5687 } 5688 BLOCK_COMMENT("} zap memory region"); 5689 } 5690 #endif // !PRODUCT 5691 5692 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { 5693 _masm = masm; 5694 _masm->load_absolute_address(_rscratch, (address)flag_addr); 5695 _masm->load_and_test_int(_rscratch, Address(_rscratch)); 5696 if (value) { 5697 _masm->z_brne(_label); // Skip if true, i.e. != 0. 5698 } else { 5699 _masm->z_bre(_label); // Skip if false, i.e. == 0. 5700 } 5701 } 5702 5703 SkipIfEqual::~SkipIfEqual() { 5704 _masm->bind(_label); 5705 } 5706 5707 // Implements lightweight-locking. 5708 // Branches to slow upon failure to lock the object. 5709 // Falls through upon success. 5710 // 5711 // - obj: the object to be locked, contents preserved. 5712 // - hdr: the header, already loaded from obj, contents destroyed. 5713 // Note: make sure Z_R1 is not manipulated here when C2 compiler is in play 5714 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register temp, Label& slow_case) { 5715 5716 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5717 assert_different_registers(obj, hdr, temp); 5718 5719 // First we need to check if the lock-stack has room for pushing the object reference. 5720 z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5721 5722 compareU32_and_branch(temp, (unsigned)LockStack::end_offset()-1, bcondHigh, slow_case); 5723 5724 // attempting a lightweight_lock 5725 // Load (object->mark() | 1) into hdr 5726 z_oill(hdr, markWord::unlocked_value); 5727 5728 z_lgr(temp, hdr); 5729 5730 // Clear lock-bits from hdr (locked state) 5731 z_xilf(temp, markWord::unlocked_value); 5732 5733 z_csg(hdr, temp, oopDesc::mark_offset_in_bytes(), obj); 5734 branch_optimized(Assembler::bcondNotEqual, slow_case); 5735 5736 // After successful lock, push object on lock-stack 5737 z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5738 z_stg(obj, Address(Z_thread, temp)); 5739 z_ahi(temp, oopSize); 5740 z_st(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5741 5742 // as locking was successful, set CC to EQ 5743 z_cr(temp, temp); 5744 } 5745 5746 // Implements lightweight-unlocking. 5747 // Branches to slow upon failure. 5748 // Falls through upon success. 5749 // 5750 // - obj: the object to be unlocked 5751 // - hdr: the (pre-loaded) header of the object, will be destroyed 5752 // - Z_R1_scratch: will be killed in case of Interpreter & C1 Compiler 5753 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp, Label& slow) { 5754 5755 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5756 assert_different_registers(obj, hdr, tmp); 5757 5758 #ifdef ASSERT 5759 { 5760 // Check that hdr is lightweight-locked. 5761 Label hdr_ok; 5762 z_lgr(tmp, hdr); 5763 z_nill(tmp, markWord::lock_mask_in_place); 5764 z_bre(hdr_ok); 5765 stop("Header is not lightweight-locked"); 5766 bind(hdr_ok); 5767 } 5768 { 5769 // The following checks rely on the fact that LockStack is only ever modified by 5770 // its owning thread, even if the lock got inflated concurrently; removal of LockStack 5771 // entries after inflation will happen delayed in that case. 5772 5773 // Check for lock-stack underflow. 5774 Label stack_ok; 5775 z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5776 compareU32_and_branch(tmp, (unsigned)LockStack::start_offset(), Assembler::bcondHigh, stack_ok); 5777 stop("Lock-stack underflow"); 5778 bind(stack_ok); 5779 } 5780 { 5781 // Check if the top of the lock-stack matches the unlocked object. 5782 Label tos_ok; 5783 z_aghi(tmp, -oopSize); 5784 z_lg(tmp, Address(Z_thread, tmp)); 5785 compare64_and_branch(tmp, obj, Assembler::bcondEqual, tos_ok); 5786 stop("Top of lock-stack does not match the unlocked object"); 5787 bind(tos_ok); 5788 } 5789 #endif // ASSERT 5790 5791 z_lgr(tmp, hdr); 5792 z_oill(tmp, markWord::unlocked_value); 5793 z_csg(hdr, tmp, oopDesc::mark_offset_in_bytes(), obj); 5794 branch_optimized(Assembler::bcondNotEqual, slow); 5795 5796 // After successful unlock, pop object from lock-stack 5797 #ifdef ASSERT 5798 z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5799 z_aghi(tmp, -oopSize); 5800 z_agr(tmp, Z_thread); 5801 z_xc(0, oopSize-1, tmp, 0, tmp); // wipe out lock-stack entry 5802 #endif 5803 z_alsi(in_bytes(JavaThread::lock_stack_top_offset()), Z_thread, -oopSize); // pop object 5804 z_cr(tmp, tmp); // set CC to EQ 5805 } 5806 5807 void MacroAssembler::pop_count_int(Register r_dst, Register r_src, Register r_tmp) { 5808 BLOCK_COMMENT("pop_count_int {"); 5809 5810 assert(r_tmp != noreg, "temp register required for pop_count_int, as code may run on machine older than z15"); 5811 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5812 5813 if (VM_Version::has_MiscInstrExt3()) { 5814 pop_count_int_with_ext3(r_dst, r_src); 5815 } else { 5816 pop_count_int_without_ext3(r_dst, r_src, r_tmp); 5817 } 5818 5819 BLOCK_COMMENT("} pop_count_int"); 5820 } 5821 5822 void MacroAssembler::pop_count_long(Register r_dst, Register r_src, Register r_tmp) { 5823 BLOCK_COMMENT("pop_count_long {"); 5824 5825 assert(r_tmp != noreg, "temp register required for pop_count_long, as code may run on machine older than z15"); 5826 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5827 5828 if (VM_Version::has_MiscInstrExt3()) { 5829 pop_count_long_with_ext3(r_dst, r_src); 5830 } else { 5831 pop_count_long_without_ext3(r_dst, r_src, r_tmp); 5832 } 5833 5834 BLOCK_COMMENT("} pop_count_long"); 5835 } 5836 5837 void MacroAssembler::pop_count_int_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 5838 BLOCK_COMMENT("pop_count_int_without_ext3 {"); 5839 5840 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 5841 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5842 5843 z_popcnt(r_dst, r_src, 0); 5844 z_srlg(r_tmp, r_dst, 16); 5845 z_alr(r_dst, r_tmp); 5846 z_srlg(r_tmp, r_dst, 8); 5847 z_alr(r_dst, r_tmp); 5848 z_llgcr(r_dst, r_dst); 5849 5850 BLOCK_COMMENT("} pop_count_int_without_ext3"); 5851 } 5852 5853 void MacroAssembler::pop_count_long_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 5854 BLOCK_COMMENT("pop_count_long_without_ext3 {"); 5855 5856 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 5857 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5858 5859 z_popcnt(r_dst, r_src, 0); 5860 z_ahhlr(r_dst, r_dst, r_dst); 5861 z_sllg(r_tmp, r_dst, 16); 5862 z_algr(r_dst, r_tmp); 5863 z_sllg(r_tmp, r_dst, 8); 5864 z_algr(r_dst, r_tmp); 5865 z_srlg(r_dst, r_dst, 56); 5866 5867 BLOCK_COMMENT("} pop_count_long_without_ext3"); 5868 } 5869 5870 void MacroAssembler::pop_count_long_with_ext3(Register r_dst, Register r_src) { 5871 BLOCK_COMMENT("pop_count_long_with_ext3 {"); 5872 5873 guarantee(VM_Version::has_MiscInstrExt3(), 5874 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 5875 z_popcnt(r_dst, r_src, 8); 5876 5877 BLOCK_COMMENT("} pop_count_long_with_ext3"); 5878 } 5879 5880 void MacroAssembler::pop_count_int_with_ext3(Register r_dst, Register r_src) { 5881 BLOCK_COMMENT("pop_count_int_with_ext3 {"); 5882 5883 guarantee(VM_Version::has_MiscInstrExt3(), 5884 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 5885 z_llgfr(r_dst, r_src); 5886 z_popcnt(r_dst, r_dst, 8); 5887 5888 BLOCK_COMMENT("} pop_count_int_with_ext3"); 5889 }