1 /* 2 * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/codeBuffer.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "code/compiledIC.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/collectedHeap.inline.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "memory/universe.hpp" 38 #include "oops/accessDecorators.hpp" 39 #include "oops/compressedKlass.inline.hpp" 40 #include "oops/compressedOops.inline.hpp" 41 #include "oops/klass.inline.hpp" 42 #include "prims/methodHandles.hpp" 43 #include "registerSaver_s390.hpp" 44 #include "runtime/icache.hpp" 45 #include "runtime/interfaceSupport.inline.hpp" 46 #include "runtime/objectMonitor.hpp" 47 #include "runtime/os.hpp" 48 #include "runtime/safepoint.hpp" 49 #include "runtime/safepointMechanism.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/stubRoutines.hpp" 52 #include "utilities/events.hpp" 53 #include "utilities/macros.hpp" 54 #include "utilities/powerOfTwo.hpp" 55 56 #include <ucontext.h> 57 58 #define BLOCK_COMMENT(str) block_comment(str) 59 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 60 61 // Move 32-bit register if destination and source are different. 62 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 63 if (rs != rd) { z_lr(rd, rs); } 64 } 65 66 // Move register if destination and source are different. 67 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 68 if (rs != rd) { z_lgr(rd, rs); } 69 } 70 71 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 72 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 73 if (rs != rd) { z_llgfr(rd, rs); } 74 } 75 76 // Move float register if destination and source are different. 77 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 78 if (rs != rd) { z_ldr(rd, rs); } 79 } 80 81 // Move integer register if destination and source are different. 82 // It is assumed that shorter-than-int types are already 83 // appropriately sign-extended. 84 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 85 BasicType src_type) { 86 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 87 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 88 89 if (dst_type == src_type) { 90 lgr_if_needed(dst, src); // Just move all 64 bits. 91 return; 92 } 93 94 switch (dst_type) { 95 // Do not support these types for now. 96 // case T_BOOLEAN: 97 case T_BYTE: // signed byte 98 switch (src_type) { 99 case T_INT: 100 z_lgbr(dst, src); 101 break; 102 default: 103 ShouldNotReachHere(); 104 } 105 return; 106 107 case T_CHAR: 108 case T_SHORT: 109 switch (src_type) { 110 case T_INT: 111 if (dst_type == T_CHAR) { 112 z_llghr(dst, src); 113 } else { 114 z_lghr(dst, src); 115 } 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 return; 121 122 case T_INT: 123 switch (src_type) { 124 case T_BOOLEAN: 125 case T_BYTE: 126 case T_CHAR: 127 case T_SHORT: 128 case T_INT: 129 case T_LONG: 130 case T_OBJECT: 131 case T_ARRAY: 132 case T_VOID: 133 case T_ADDRESS: 134 lr_if_needed(dst, src); 135 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 136 return; 137 138 default: 139 assert(false, "non-integer src type"); 140 return; 141 } 142 case T_LONG: 143 switch (src_type) { 144 case T_BOOLEAN: 145 case T_BYTE: 146 case T_CHAR: 147 case T_SHORT: 148 case T_INT: 149 z_lgfr(dst, src); // sign extension 150 return; 151 152 case T_LONG: 153 case T_OBJECT: 154 case T_ARRAY: 155 case T_VOID: 156 case T_ADDRESS: 157 lgr_if_needed(dst, src); 158 return; 159 160 default: 161 assert(false, "non-integer src type"); 162 return; 163 } 164 return; 165 case T_OBJECT: 166 case T_ARRAY: 167 case T_VOID: 168 case T_ADDRESS: 169 switch (src_type) { 170 // These types don't make sense to be converted to pointers: 171 // case T_BOOLEAN: 172 // case T_BYTE: 173 // case T_CHAR: 174 // case T_SHORT: 175 176 case T_INT: 177 z_llgfr(dst, src); // zero extension 178 return; 179 180 case T_LONG: 181 case T_OBJECT: 182 case T_ARRAY: 183 case T_VOID: 184 case T_ADDRESS: 185 lgr_if_needed(dst, src); 186 return; 187 188 default: 189 assert(false, "non-integer src type"); 190 return; 191 } 192 return; 193 default: 194 assert(false, "non-integer dst type"); 195 return; 196 } 197 } 198 199 // Move float register if destination and source are different. 200 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 201 FloatRegister src, BasicType src_type) { 202 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 203 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 204 if (dst_type == src_type) { 205 ldr_if_needed(dst, src); // Just move all 64 bits. 206 } else { 207 switch (dst_type) { 208 case T_FLOAT: 209 assert(src_type == T_DOUBLE, "invalid float type combination"); 210 z_ledbr(dst, src); 211 return; 212 case T_DOUBLE: 213 assert(src_type == T_FLOAT, "invalid float type combination"); 214 z_ldebr(dst, src); 215 return; 216 default: 217 assert(false, "non-float dst type"); 218 return; 219 } 220 } 221 } 222 223 // Optimized emitter for reg to mem operations. 224 // Uses modern instructions if running on modern hardware, classic instructions 225 // otherwise. Prefers (usually shorter) classic instructions if applicable. 226 // Data register (reg) cannot be used as work register. 227 // 228 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 229 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 230 void MacroAssembler::freg2mem_opt(FloatRegister reg, 231 int64_t disp, 232 Register index, 233 Register base, 234 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 235 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 236 Register scratch) { 237 index = (index == noreg) ? Z_R0 : index; 238 if (Displacement::is_shortDisp(disp)) { 239 (this->*classic)(reg, disp, index, base); 240 } else { 241 if (Displacement::is_validDisp(disp)) { 242 (this->*modern)(reg, disp, index, base); 243 } else { 244 if (scratch != Z_R0 && scratch != Z_R1) { 245 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 246 } else { 247 if (scratch != Z_R0) { // scratch == Z_R1 248 if ((scratch == index) || (index == base)) { 249 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 250 } else { 251 add2reg(scratch, disp, base); 252 (this->*classic)(reg, 0, index, scratch); 253 if (base == scratch) { 254 add2reg(base, -disp); // Restore base. 255 } 256 } 257 } else { // scratch == Z_R0 258 z_lgr(scratch, base); 259 add2reg(base, disp); 260 (this->*classic)(reg, 0, index, base); 261 z_lgr(base, scratch); // Restore base. 262 } 263 } 264 } 265 } 266 } 267 268 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 269 if (is_double) { 270 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 271 } else { 272 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 273 } 274 } 275 276 // Optimized emitter for mem to reg operations. 277 // Uses modern instructions if running on modern hardware, classic instructions 278 // otherwise. Prefers (usually shorter) classic instructions if applicable. 279 // data register (reg) cannot be used as work register. 280 // 281 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 282 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 283 void MacroAssembler::mem2freg_opt(FloatRegister reg, 284 int64_t disp, 285 Register index, 286 Register base, 287 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 288 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 289 Register scratch) { 290 index = (index == noreg) ? Z_R0 : index; 291 if (Displacement::is_shortDisp(disp)) { 292 (this->*classic)(reg, disp, index, base); 293 } else { 294 if (Displacement::is_validDisp(disp)) { 295 (this->*modern)(reg, disp, index, base); 296 } else { 297 if (scratch != Z_R0 && scratch != Z_R1) { 298 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 299 } else { 300 if (scratch != Z_R0) { // scratch == Z_R1 301 if ((scratch == index) || (index == base)) { 302 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 303 } else { 304 add2reg(scratch, disp, base); 305 (this->*classic)(reg, 0, index, scratch); 306 if (base == scratch) { 307 add2reg(base, -disp); // Restore base. 308 } 309 } 310 } else { // scratch == Z_R0 311 z_lgr(scratch, base); 312 add2reg(base, disp); 313 (this->*classic)(reg, 0, index, base); 314 z_lgr(base, scratch); // Restore base. 315 } 316 } 317 } 318 } 319 } 320 321 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 322 if (is_double) { 323 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 324 } else { 325 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 326 } 327 } 328 329 // Optimized emitter for reg to mem operations. 330 // Uses modern instructions if running on modern hardware, classic instructions 331 // otherwise. Prefers (usually shorter) classic instructions if applicable. 332 // Data register (reg) cannot be used as work register. 333 // 334 // Don't rely on register locking, instead pass a scratch register 335 // (Z_R0 by default) 336 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 337 void MacroAssembler::reg2mem_opt(Register reg, 338 int64_t disp, 339 Register index, 340 Register base, 341 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 342 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 343 Register scratch) { 344 index = (index == noreg) ? Z_R0 : index; 345 if (Displacement::is_shortDisp(disp)) { 346 (this->*classic)(reg, disp, index, base); 347 } else { 348 if (Displacement::is_validDisp(disp)) { 349 (this->*modern)(reg, disp, index, base); 350 } else { 351 if (scratch != Z_R0 && scratch != Z_R1) { 352 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 353 } else { 354 if (scratch != Z_R0) { // scratch == Z_R1 355 if ((scratch == index) || (index == base)) { 356 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 357 } else { 358 add2reg(scratch, disp, base); 359 (this->*classic)(reg, 0, index, scratch); 360 if (base == scratch) { 361 add2reg(base, -disp); // Restore base. 362 } 363 } 364 } else { // scratch == Z_R0 365 if ((scratch == reg) || (scratch == base) || (reg == base)) { 366 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 367 } else { 368 z_lgr(scratch, base); 369 add2reg(base, disp); 370 (this->*classic)(reg, 0, index, base); 371 z_lgr(base, scratch); // Restore base. 372 } 373 } 374 } 375 } 376 } 377 } 378 379 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 380 int store_offset = offset(); 381 if (is_double) { 382 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 383 } else { 384 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 385 } 386 return store_offset; 387 } 388 389 // Optimized emitter for mem to reg operations. 390 // Uses modern instructions if running on modern hardware, classic instructions 391 // otherwise. Prefers (usually shorter) classic instructions if applicable. 392 // Data register (reg) will be used as work register where possible. 393 void MacroAssembler::mem2reg_opt(Register reg, 394 int64_t disp, 395 Register index, 396 Register base, 397 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 398 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 399 index = (index == noreg) ? Z_R0 : index; 400 if (Displacement::is_shortDisp(disp)) { 401 (this->*classic)(reg, disp, index, base); 402 } else { 403 if (Displacement::is_validDisp(disp)) { 404 (this->*modern)(reg, disp, index, base); 405 } else { 406 if ((reg == index) && (reg == base)) { 407 z_sllg(reg, reg, 1); 408 add2reg(reg, disp); 409 (this->*classic)(reg, 0, noreg, reg); 410 } else if ((reg == index) && (reg != Z_R0)) { 411 add2reg(reg, disp); 412 (this->*classic)(reg, 0, reg, base); 413 } else if (reg == base) { 414 add2reg(reg, disp); 415 (this->*classic)(reg, 0, index, reg); 416 } else if (reg != Z_R0) { 417 add2reg(reg, disp, base); 418 (this->*classic)(reg, 0, index, reg); 419 } else { // reg == Z_R0 && reg != base here 420 add2reg(base, disp); 421 (this->*classic)(reg, 0, index, base); 422 add2reg(base, -disp); 423 } 424 } 425 } 426 } 427 428 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 429 if (is_double) { 430 z_lg(reg, a); 431 } else { 432 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 433 } 434 } 435 436 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 437 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 438 } 439 440 void MacroAssembler::and_imm(Register r, long mask, 441 Register tmp /* = Z_R0 */, 442 bool wide /* = false */) { 443 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 444 445 if (!wide) { 446 z_nilf(r, mask); 447 return; 448 } 449 450 assert(r != tmp, " need a different temporary register !"); 451 load_const_optimized(tmp, mask); 452 z_ngr(r, tmp); 453 } 454 455 // Calculate the 1's complement. 456 // Note: The condition code is neither preserved nor correctly set by this code!!! 457 // Note: (wide == false) does not protect the high order half of the target register 458 // from alteration. It only serves as optimization hint for 32-bit results. 459 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 460 461 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 462 z_xilf(r1, -1); 463 if (wide) { 464 z_xihf(r1, -1); 465 } 466 } else { // Distinct src and dst registers. 467 load_const_optimized(r1, -1); 468 z_xgr(r1, r2); 469 } 470 } 471 472 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 473 assert(lBitPos >= 0, "zero is leftmost bit position"); 474 assert(rBitPos <= 63, "63 is rightmost bit position"); 475 assert(lBitPos <= rBitPos, "inverted selection interval"); 476 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 477 } 478 479 // Helper function for the "Rotate_then_<logicalOP>" emitters. 480 // Rotate src, then mask register contents such that only bits in range survive. 481 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 482 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 483 // The caller must ensure that the selected range only contains bits with defined value. 484 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 485 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 486 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 487 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 488 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 489 // Pre-determine which parts of dst will be zero after shift/rotate. 490 bool llZero = sll4rll && (nRotate >= 16); 491 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 492 bool lfZero = llZero && lhZero; 493 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 494 bool hhZero = (srl4rll && (nRotate <= -16)); 495 bool hfZero = hlZero && hhZero; 496 497 // rotate then mask src operand. 498 // if oneBits == true, all bits outside selected range are 1s. 499 // if oneBits == false, all bits outside selected range are 0s. 500 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 501 if (dst32bit) { 502 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 503 } else { 504 if (sll4rll) { z_sllg(dst, src, nRotate); } 505 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 506 else { z_rllg(dst, src, nRotate); } 507 } 508 } else { 509 if (sll4rll) { z_sllg(dst, src, nRotate); } 510 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 511 else { z_rllg(dst, src, nRotate); } 512 } 513 514 unsigned long range_mask = create_mask(lBitPos, rBitPos); 515 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 516 unsigned int range_mask_l = (unsigned int)range_mask; 517 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 518 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 519 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 520 unsigned short range_mask_ll = (unsigned short)range_mask; 521 // Works for z9 and newer H/W. 522 if (oneBits) { 523 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 524 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 525 } else { 526 // All bits outside range become 0s 527 if (((~range_mask_l) != 0) && !lfZero) { 528 z_nilf(dst, range_mask_l); 529 } 530 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 531 z_nihf(dst, range_mask_h); 532 } 533 } 534 } 535 536 // Rotate src, then insert selected range from rotated src into dst. 537 // Clear dst before, if requested. 538 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 539 int nRotate, bool clear_dst) { 540 // This version does not depend on src being zero-extended int2long. 541 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 542 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 543 } 544 545 // Rotate src, then and selected range from rotated src into dst. 546 // Set condition code only if so requested. Otherwise it is unpredictable. 547 // See performance note in macroAssembler_s390.hpp for important information. 548 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 549 int nRotate, bool test_only) { 550 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 551 // This version does not depend on src being zero-extended int2long. 552 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 553 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 554 } 555 556 // Rotate src, then or selected range from rotated src into dst. 557 // Set condition code only if so requested. Otherwise it is unpredictable. 558 // See performance note in macroAssembler_s390.hpp for important information. 559 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 560 int nRotate, bool test_only) { 561 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 562 // This version does not depend on src being zero-extended int2long. 563 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 564 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 565 } 566 567 // Rotate src, then xor selected range from rotated src into dst. 568 // Set condition code only if so requested. Otherwise it is unpredictable. 569 // See performance note in macroAssembler_s390.hpp for important information. 570 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 571 int nRotate, bool test_only) { 572 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 573 // This version does not depend on src being zero-extended int2long. 574 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 575 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 576 } 577 578 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 579 if (inc.is_register()) { 580 z_agr(r1, inc.as_register()); 581 } else { // constant 582 intptr_t imm = inc.as_constant(); 583 add2reg(r1, imm); 584 } 585 } 586 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 587 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 588 // calculation and is thus rather slow. 589 // 590 // There is no handling for special cases, e.g. cval==0 or cval==1. 591 // 592 // Returns len of generated code block. 593 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 594 int block_start = offset(); 595 596 bool sign_flip = cval < 0; 597 cval = sign_flip ? -cval : cval; 598 599 BLOCK_COMMENT("Reg64*Con16 {"); 600 601 int bit1 = cval & -cval; 602 if (bit1 == cval) { 603 z_sllg(rval, rval, exact_log2(bit1)); 604 if (sign_flip) { z_lcgr(rval, rval); } 605 } else { 606 int bit2 = (cval-bit1) & -(cval-bit1); 607 if ((bit1+bit2) == cval) { 608 z_sllg(work, rval, exact_log2(bit1)); 609 z_sllg(rval, rval, exact_log2(bit2)); 610 z_agr(rval, work); 611 if (sign_flip) { z_lcgr(rval, rval); } 612 } else { 613 if (sign_flip) { z_mghi(rval, -cval); } 614 else { z_mghi(rval, cval); } 615 } 616 } 617 BLOCK_COMMENT("} Reg64*Con16"); 618 619 int block_end = offset(); 620 return block_end - block_start; 621 } 622 623 // Generic operation r1 := r2 + imm. 624 // 625 // Should produce the best code for each supported CPU version. 626 // r2 == noreg yields r1 := r1 + imm 627 // imm == 0 emits either no instruction or r1 := r2 ! 628 // NOTES: 1) Don't use this function where fixed sized 629 // instruction sequences are required!!! 630 // 2) Don't use this function if condition code 631 // setting is required! 632 // 3) Despite being declared as int64_t, the parameter imm 633 // must be a simm_32 value (= signed 32-bit integer). 634 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 635 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 636 637 if (r2 == noreg) { r2 = r1; } 638 639 // Handle special case imm == 0. 640 if (imm == 0) { 641 lgr_if_needed(r1, r2); 642 // Nothing else to do. 643 return; 644 } 645 646 if (!PreferLAoverADD || (r2 == Z_R0)) { 647 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 648 649 // Can we encode imm in 16 bits signed? 650 if (Immediate::is_simm16(imm)) { 651 if (r1 == r2) { 652 z_aghi(r1, imm); 653 return; 654 } 655 if (distinctOpnds) { 656 z_aghik(r1, r2, imm); 657 return; 658 } 659 z_lgr(r1, r2); 660 z_aghi(r1, imm); 661 return; 662 } 663 } else { 664 // Can we encode imm in 12 bits unsigned? 665 if (Displacement::is_shortDisp(imm)) { 666 z_la(r1, imm, r2); 667 return; 668 } 669 // Can we encode imm in 20 bits signed? 670 if (Displacement::is_validDisp(imm)) { 671 // Always use LAY instruction, so we don't need the tmp register. 672 z_lay(r1, imm, r2); 673 return; 674 } 675 676 } 677 678 // Can handle it (all possible values) with long immediates. 679 lgr_if_needed(r1, r2); 680 z_agfi(r1, imm); 681 } 682 683 // Generic operation r := b + x + d 684 // 685 // Addition of several operands with address generation semantics - sort of: 686 // - no restriction on the registers. Any register will do for any operand. 687 // - x == noreg: operand will be disregarded. 688 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 689 // - x == Z_R0: just disregard 690 // - b == Z_R0: use as operand. This is not address generation semantics!!! 691 // 692 // The same restrictions as on add2reg() are valid!!! 693 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 694 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 695 696 if (x == noreg) { x = Z_R0; } 697 if (b == noreg) { b = r; } 698 699 // Handle special case x == R0. 700 if (x == Z_R0) { 701 // Can simply add the immediate value to the base register. 702 add2reg(r, d, b); 703 return; 704 } 705 706 if (!PreferLAoverADD || (b == Z_R0)) { 707 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 708 // Handle special case d == 0. 709 if (d == 0) { 710 if (b == x) { z_sllg(r, b, 1); return; } 711 if (r == x) { z_agr(r, b); return; } 712 if (r == b) { z_agr(r, x); return; } 713 if (distinctOpnds) { z_agrk(r, x, b); return; } 714 z_lgr(r, b); 715 z_agr(r, x); 716 } else { 717 if (x == b) { z_sllg(r, x, 1); } 718 else if (r == x) { z_agr(r, b); } 719 else if (r == b) { z_agr(r, x); } 720 else if (distinctOpnds) { z_agrk(r, x, b); } 721 else { 722 z_lgr(r, b); 723 z_agr(r, x); 724 } 725 add2reg(r, d); 726 } 727 } else { 728 // Can we encode imm in 12 bits unsigned? 729 if (Displacement::is_shortDisp(d)) { 730 z_la(r, d, x, b); 731 return; 732 } 733 // Can we encode imm in 20 bits signed? 734 if (Displacement::is_validDisp(d)) { 735 z_lay(r, d, x, b); 736 return; 737 } 738 z_la(r, 0, x, b); 739 add2reg(r, d); 740 } 741 } 742 743 // Generic emitter (32bit) for direct memory increment. 744 // For optimal code, do not specify Z_R0 as temp register. 745 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 746 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 747 z_asi(a, imm); 748 } else { 749 z_lgf(tmp, a); 750 add2reg(tmp, imm); 751 z_st(tmp, a); 752 } 753 } 754 755 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 756 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 757 z_agsi(a, imm); 758 } else { 759 z_lg(tmp, a); 760 add2reg(tmp, imm); 761 z_stg(tmp, a); 762 } 763 } 764 765 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 766 switch (size_in_bytes) { 767 case 8: z_lg(dst, src); break; 768 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 769 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 770 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 771 default: ShouldNotReachHere(); 772 } 773 } 774 775 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 776 switch (size_in_bytes) { 777 case 8: z_stg(src, dst); break; 778 case 4: z_st(src, dst); break; 779 case 2: z_sth(src, dst); break; 780 case 1: z_stc(src, dst); break; 781 default: ShouldNotReachHere(); 782 } 783 } 784 785 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 786 // a high-order summand in register tmp. 787 // 788 // return value: < 0: No split required, si20 actually has property uimm12. 789 // >= 0: Split performed. Use return value as uimm12 displacement and 790 // tmp as index register. 791 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 792 assert(Immediate::is_simm20(si20_offset), "sanity"); 793 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 794 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 795 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 796 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 797 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 798 799 Register work = accumulate? Z_R0 : tmp; 800 801 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 802 z_lghi(work, ll_off>>12); // Implicit sign extension. 803 z_slag(work, work, 12); 804 } else { // Len of code = 0..10. 805 if (ll_off == 0) { return -1; } 806 // ll_off has 8 significant bits (at most) plus sign. 807 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 808 z_llilh(work, ll_off >> 16); 809 if (ll_off < 0) { // Sign-extension required. 810 z_lgfr(work, work); 811 } 812 } else { 813 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 814 z_llill(work, ll_off); 815 } else { // Non-zero bits in both halfbytes. 816 z_lghi(work, ll_off>>12); // Implicit sign extension. 817 z_slag(work, work, 12); 818 } 819 } 820 } 821 if (accumulate) { z_algr(tmp, work); } // len of code += 4 822 return lg_off; 823 } 824 825 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 826 if (Displacement::is_validDisp(si20)) { 827 z_ley(t, si20, a); 828 } else { 829 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 830 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 831 // pool loads). 832 bool accumulate = true; 833 bool fixed_codelen = true; 834 Register work; 835 836 if (fixed_codelen) { 837 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 838 } else { 839 accumulate = (a == tmp); 840 } 841 work = tmp; 842 843 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 844 if (disp12 < 0) { 845 z_le(t, si20, work); 846 } else { 847 if (accumulate) { 848 z_le(t, disp12, work); 849 } else { 850 z_le(t, disp12, work, a); 851 } 852 } 853 } 854 } 855 856 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 857 if (Displacement::is_validDisp(si20)) { 858 z_ldy(t, si20, a); 859 } else { 860 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 861 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 862 // pool loads). 863 bool accumulate = true; 864 bool fixed_codelen = true; 865 Register work; 866 867 if (fixed_codelen) { 868 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 869 } else { 870 accumulate = (a == tmp); 871 } 872 work = tmp; 873 874 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 875 if (disp12 < 0) { 876 z_ld(t, si20, work); 877 } else { 878 if (accumulate) { 879 z_ld(t, disp12, work); 880 } else { 881 z_ld(t, disp12, work, a); 882 } 883 } 884 } 885 } 886 887 // PCrelative TOC access. 888 // Returns distance (in bytes) from current position to start of consts section. 889 // Returns 0 (zero) if no consts section exists or if it has size zero. 890 long MacroAssembler::toc_distance() { 891 CodeSection* cs = code()->consts(); 892 return (long)((cs != nullptr) ? cs->start()-pc() : 0); 893 } 894 895 // Implementation on x86/sparc assumes that constant and instruction section are 896 // adjacent, but this doesn't hold. Two special situations may occur, that we must 897 // be able to handle: 898 // 1. const section may be located apart from the inst section. 899 // 2. const section may be empty 900 // In both cases, we use the const section's start address to compute the "TOC", 901 // this seems to occur only temporarily; in the final step we always seem to end up 902 // with the pc-relatice variant. 903 // 904 // PC-relative offset could be +/-2**32 -> use long for disp 905 // Furthermore: makes no sense to have special code for 906 // adjacent const and inst sections. 907 void MacroAssembler::load_toc(Register Rtoc) { 908 // Simply use distance from start of const section (should be patched in the end). 909 long disp = toc_distance(); 910 911 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 912 relocate(rspec); 913 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 914 } 915 916 // PCrelative TOC access. 917 // Load from anywhere pcrelative (with relocation of load instr) 918 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 919 address pc = this->pc(); 920 ptrdiff_t total_distance = dataLocation - pc; 921 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 922 923 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 924 assert(total_distance != 0, "sanity"); 925 926 // Some extra safety net. 927 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 928 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 929 } 930 931 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 932 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 933 } 934 935 936 // PCrelative TOC access. 937 // Load from anywhere pcrelative (with relocation of load instr) 938 // loaded addr has to be relocated when added to constant pool. 939 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 940 address pc = this->pc(); 941 ptrdiff_t total_distance = addrLocation - pc; 942 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 943 944 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 945 946 // Some extra safety net. 947 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 948 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 949 } 950 951 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 952 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 953 } 954 955 // Generic operation: load a value from memory and test. 956 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 957 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 958 z_lb(dst, a); 959 z_ltr(dst, dst); 960 } 961 962 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 963 int64_t disp = a.disp20(); 964 if (Displacement::is_shortDisp(disp)) { 965 z_lh(dst, a); 966 } else if (Displacement::is_longDisp(disp)) { 967 z_lhy(dst, a); 968 } else { 969 guarantee(false, "displacement out of range"); 970 } 971 z_ltr(dst, dst); 972 } 973 974 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 975 z_lt(dst, a); 976 } 977 978 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 979 z_ltgf(dst, a); 980 } 981 982 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 983 z_ltg(dst, a); 984 } 985 986 // Test a bit in memory. 987 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 988 assert(a.index() == noreg, "no index reg allowed in testbit"); 989 if (bit <= 7) { 990 z_tm(a.disp() + 3, a.base(), 1 << bit); 991 } else if (bit <= 15) { 992 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 993 } else if (bit <= 23) { 994 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 995 } else if (bit <= 31) { 996 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 997 } else { 998 ShouldNotReachHere(); 999 } 1000 } 1001 1002 // Test a bit in a register. Result is reflected in CC. 1003 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1004 if (bitPos < 16) { 1005 z_tmll(r, 1U<<bitPos); 1006 } else if (bitPos < 32) { 1007 z_tmlh(r, 1U<<(bitPos-16)); 1008 } else if (bitPos < 48) { 1009 z_tmhl(r, 1U<<(bitPos-32)); 1010 } else if (bitPos < 64) { 1011 z_tmhh(r, 1U<<(bitPos-48)); 1012 } else { 1013 ShouldNotReachHere(); 1014 } 1015 } 1016 1017 void MacroAssembler::prefetch_read(Address a) { 1018 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1019 } 1020 void MacroAssembler::prefetch_update(Address a) { 1021 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1022 } 1023 1024 // Clear a register, i.e. load const zero into reg. 1025 // Return len (in bytes) of generated instruction(s). 1026 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1027 // set_cc: Use instruction that sets the condition code, if true. 1028 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1029 unsigned int start_off = offset(); 1030 if (whole_reg) { 1031 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1032 } else { // Only 32bit register. 1033 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1034 } 1035 return offset() - start_off; 1036 } 1037 1038 #ifdef ASSERT 1039 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1040 switch (pattern_len) { 1041 case 1: 1042 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1043 case 2: 1044 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1045 case 4: 1046 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1047 case 8: 1048 return load_const_optimized_rtn_len(r, pattern, true); 1049 break; 1050 default: 1051 guarantee(false, "preset_reg: bad len"); 1052 } 1053 return 0; 1054 } 1055 #endif 1056 1057 // addr: Address descriptor of memory to clear. Index register will not be used! 1058 // size: Number of bytes to clear. 1059 // condition code will not be preserved. 1060 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1061 // !!! Use store_const() instead !!! 1062 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) { 1063 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large"); 1064 1065 switch (size) { 1066 case 0: 1067 return; 1068 case 1: 1069 z_mvi(addr, 0); 1070 return; 1071 case 2: 1072 z_mvhhi(addr, 0); 1073 return; 1074 case 4: 1075 z_mvhi(addr, 0); 1076 return; 1077 case 8: 1078 z_mvghi(addr, 0); 1079 return; 1080 default: ; // Fallthru to xc. 1081 } 1082 1083 // Caution: the emitter with Address operands does implicitly decrement the length 1084 if (size <= 256) { 1085 z_xc(addr, size, addr); 1086 } else { 1087 unsigned int offset = addr.disp(); 1088 unsigned int incr = 256; 1089 for (unsigned int i = 0; i <= size-incr; i += incr) { 1090 z_xc(offset, incr - 1, addr.base(), offset, addr.base()); 1091 offset += incr; 1092 } 1093 unsigned int rest = size - (offset - addr.disp()); 1094 if (size > 0) { 1095 z_xc(offset, rest-1, addr.base(), offset, addr.base()); 1096 } 1097 } 1098 } 1099 1100 void MacroAssembler::align(int modulus) { 1101 align(modulus, offset()); 1102 } 1103 1104 void MacroAssembler::align(int modulus, int target) { 1105 assert(((modulus % 2 == 0) && (target % 2 == 0)), "needs to be even"); 1106 int delta = target - offset(); 1107 while ((offset() + delta) % modulus != 0) z_nop(); 1108 } 1109 1110 // Special version for non-relocateable code if required alignment 1111 // is larger than CodeEntryAlignment. 1112 void MacroAssembler::align_address(int modulus) { 1113 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1114 } 1115 1116 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1117 Register temp_reg, 1118 int64_t extra_slot_offset) { 1119 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1120 // which issues an unnecessary add instruction. 1121 int stackElementSize = Interpreter::stackElementSize; 1122 int64_t offset = extra_slot_offset * stackElementSize; 1123 const Register argbase = Z_esp; 1124 if (arg_slot.is_constant()) { 1125 offset += arg_slot.as_constant() * stackElementSize; 1126 return Address(argbase, offset); 1127 } 1128 // else 1129 assert(temp_reg != noreg, "must specify"); 1130 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1131 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1132 return Address(argbase, temp_reg, offset); 1133 } 1134 1135 1136 //=================================================================== 1137 //=== START C O N S T A N T S I N C O D E S T R E A M === 1138 //=================================================================== 1139 //=== P A T CH A B L E C O N S T A N T S === 1140 //=================================================================== 1141 1142 1143 //--------------------------------------------------- 1144 // Load (patchable) constant into register 1145 //--------------------------------------------------- 1146 1147 1148 // Load absolute address (and try to optimize). 1149 // Note: This method is usable only for position-fixed code, 1150 // referring to a position-fixed target location. 1151 // If not so, relocations and patching must be used. 1152 void MacroAssembler::load_absolute_address(Register d, address addr) { 1153 assert(addr != nullptr, "should not happen"); 1154 BLOCK_COMMENT("load_absolute_address:"); 1155 if (addr == nullptr) { 1156 z_larl(d, pc()); // Dummy emit for size calc. 1157 return; 1158 } 1159 1160 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1161 z_larl(d, addr); 1162 return; 1163 } 1164 1165 load_const_optimized(d, (long)addr); 1166 } 1167 1168 // Load a 64bit constant. 1169 // Patchable code sequence, but not atomically patchable. 1170 // Make sure to keep code size constant -> no value-dependent optimizations. 1171 // Do not kill condition code. 1172 void MacroAssembler::load_const(Register t, long x) { 1173 // Note: Right shift is only cleanly defined for unsigned types 1174 // or for signed types with nonnegative values. 1175 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1176 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1177 } 1178 1179 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1180 // Patchable code sequence, but not atomically patchable. 1181 // Make sure to keep code size constant -> no value-dependent optimizations. 1182 // Do not kill condition code. 1183 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1184 if (sign_extend) { Assembler::z_lgfi(t, x); } 1185 else { Assembler::z_llilf(t, x); } 1186 } 1187 1188 // Load narrow oop constant, no decompression. 1189 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1190 assert(UseCompressedOops, "must be on to call this method"); 1191 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1192 } 1193 1194 // Load narrow klass constant, compression required. 1195 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1196 assert(UseCompressedClassPointers, "must be on to call this method"); 1197 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1198 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1199 } 1200 1201 //------------------------------------------------------ 1202 // Compare (patchable) constant with register. 1203 //------------------------------------------------------ 1204 1205 // Compare narrow oop in reg with narrow oop constant, no decompression. 1206 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1207 assert(UseCompressedOops, "must be on to call this method"); 1208 1209 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1210 } 1211 1212 // Compare narrow oop in reg with narrow oop constant, no decompression. 1213 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1214 assert(UseCompressedClassPointers, "must be on to call this method"); 1215 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1216 1217 Assembler::z_clfi(klass1, encoded_k); 1218 } 1219 1220 //---------------------------------------------------------- 1221 // Check which kind of load_constant we have here. 1222 //---------------------------------------------------------- 1223 1224 // Detection of CPU version dependent load_const sequence. 1225 // The detection is valid only for code sequences generated by load_const, 1226 // not load_const_optimized. 1227 bool MacroAssembler::is_load_const(address a) { 1228 unsigned long inst1, inst2; 1229 unsigned int len1, len2; 1230 1231 len1 = get_instruction(a, &inst1); 1232 len2 = get_instruction(a + len1, &inst2); 1233 1234 return is_z_iihf(inst1) && is_z_iilf(inst2); 1235 } 1236 1237 // Detection of CPU version dependent load_const_32to64 sequence. 1238 // Mostly used for narrow oops and narrow Klass pointers. 1239 // The detection is valid only for code sequences generated by load_const_32to64. 1240 bool MacroAssembler::is_load_const_32to64(address pos) { 1241 unsigned long inst1, inst2; 1242 unsigned int len1; 1243 1244 len1 = get_instruction(pos, &inst1); 1245 return is_z_llilf(inst1); 1246 } 1247 1248 // Detection of compare_immediate_narrow sequence. 1249 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1250 bool MacroAssembler::is_compare_immediate32(address pos) { 1251 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1252 } 1253 1254 // Detection of compare_immediate_narrow sequence. 1255 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1256 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1257 return is_compare_immediate32(pos); 1258 } 1259 1260 // Detection of compare_immediate_narrow sequence. 1261 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1262 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1263 return is_compare_immediate32(pos); 1264 } 1265 1266 //----------------------------------- 1267 // patch the load_constant 1268 //----------------------------------- 1269 1270 // CPU-version dependent patching of load_const. 1271 void MacroAssembler::patch_const(address a, long x) { 1272 assert(is_load_const(a), "not a load of a constant"); 1273 // Note: Right shift is only cleanly defined for unsigned types 1274 // or for signed types with nonnegative values. 1275 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1276 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1277 } 1278 1279 // Patching the value of CPU version dependent load_const_32to64 sequence. 1280 // The passed ptr MUST be in compressed format! 1281 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1282 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1283 1284 set_imm32(pos, np); 1285 return 6; 1286 } 1287 1288 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1289 // The passed ptr MUST be in compressed format! 1290 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1291 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1292 1293 set_imm32(pos, np); 1294 return 6; 1295 } 1296 1297 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1298 // The passed ptr must NOT be in compressed format! 1299 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1300 assert(UseCompressedOops, "Can only patch compressed oops"); 1301 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1302 } 1303 1304 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1305 // The passed ptr must NOT be in compressed format! 1306 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1307 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1308 1309 narrowKlass nk = CompressedKlassPointers::encode(k); 1310 return patch_load_const_32to64(pos, nk); 1311 } 1312 1313 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1314 // The passed ptr must NOT be in compressed format! 1315 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1316 assert(UseCompressedOops, "Can only patch compressed oops"); 1317 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1318 } 1319 1320 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1321 // The passed ptr must NOT be in compressed format! 1322 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1323 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1324 1325 narrowKlass nk = CompressedKlassPointers::encode(k); 1326 return patch_compare_immediate_32(pos, nk); 1327 } 1328 1329 //------------------------------------------------------------------------ 1330 // Extract the constant from a load_constant instruction stream. 1331 //------------------------------------------------------------------------ 1332 1333 // Get constant from a load_const sequence. 1334 long MacroAssembler::get_const(address a) { 1335 assert(is_load_const(a), "not a load of a constant"); 1336 unsigned long x; 1337 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1338 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1339 return (long) x; 1340 } 1341 1342 //-------------------------------------- 1343 // Store a constant in memory. 1344 //-------------------------------------- 1345 1346 // General emitter to move a constant to memory. 1347 // The store is atomic. 1348 // o Address must be given in RS format (no index register) 1349 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1350 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1351 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1352 // o Memory slot must be at least as wide as constant, will assert otherwise. 1353 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1354 int MacroAssembler::store_const(const Address &dest, long imm, 1355 unsigned int lm, unsigned int lc, 1356 Register scratch) { 1357 int64_t disp = dest.disp(); 1358 Register base = dest.base(); 1359 assert(!dest.has_index(), "not supported"); 1360 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1361 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1362 assert(lm>=lc, "memory slot too small"); 1363 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1364 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1365 1366 bool is_shortDisp = Displacement::is_shortDisp(disp); 1367 int store_offset = -1; 1368 1369 // For target len == 1 it's easy. 1370 if (lm == 1) { 1371 store_offset = offset(); 1372 if (is_shortDisp) { 1373 z_mvi(disp, base, imm); 1374 return store_offset; 1375 } else { 1376 z_mviy(disp, base, imm); 1377 return store_offset; 1378 } 1379 } 1380 1381 // All the "good stuff" takes an unsigned displacement. 1382 if (is_shortDisp) { 1383 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1384 1385 store_offset = offset(); 1386 switch (lm) { 1387 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1388 z_mvhhi(disp, base, imm); 1389 return store_offset; 1390 case 4: 1391 if (Immediate::is_simm16(imm)) { 1392 z_mvhi(disp, base, imm); 1393 return store_offset; 1394 } 1395 break; 1396 case 8: 1397 if (Immediate::is_simm16(imm)) { 1398 z_mvghi(disp, base, imm); 1399 return store_offset; 1400 } 1401 break; 1402 default: 1403 ShouldNotReachHere(); 1404 break; 1405 } 1406 } 1407 1408 // Can't optimize, so load value and store it. 1409 guarantee(scratch != noreg, " need a scratch register here !"); 1410 if (imm != 0) { 1411 load_const_optimized(scratch, imm); // Preserves CC anyway. 1412 } else { 1413 // Leave CC alone!! 1414 (void) clear_reg(scratch, true, false); // Indicate unused result. 1415 } 1416 1417 store_offset = offset(); 1418 if (is_shortDisp) { 1419 switch (lm) { 1420 case 2: 1421 z_sth(scratch, disp, Z_R0, base); 1422 return store_offset; 1423 case 4: 1424 z_st(scratch, disp, Z_R0, base); 1425 return store_offset; 1426 case 8: 1427 z_stg(scratch, disp, Z_R0, base); 1428 return store_offset; 1429 default: 1430 ShouldNotReachHere(); 1431 break; 1432 } 1433 } else { 1434 switch (lm) { 1435 case 2: 1436 z_sthy(scratch, disp, Z_R0, base); 1437 return store_offset; 1438 case 4: 1439 z_sty(scratch, disp, Z_R0, base); 1440 return store_offset; 1441 case 8: 1442 z_stg(scratch, disp, Z_R0, base); 1443 return store_offset; 1444 default: 1445 ShouldNotReachHere(); 1446 break; 1447 } 1448 } 1449 return -1; // should not reach here 1450 } 1451 1452 //=================================================================== 1453 //=== N O T P A T CH A B L E C O N S T A N T S === 1454 //=================================================================== 1455 1456 // Load constant x into register t with a fast instruction sequence 1457 // depending on the bits in x. Preserves CC under all circumstances. 1458 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1459 if (x == 0) { 1460 int len; 1461 if (emit) { 1462 len = clear_reg(t, true, false); 1463 } else { 1464 len = 4; 1465 } 1466 return len; 1467 } 1468 1469 if (Immediate::is_simm16(x)) { 1470 if (emit) { z_lghi(t, x); } 1471 return 4; 1472 } 1473 1474 // 64 bit value: | part1 | part2 | part3 | part4 | 1475 // At least one part is not zero! 1476 // Note: Right shift is only cleanly defined for unsigned types 1477 // or for signed types with nonnegative values. 1478 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1479 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1480 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1481 int part4 = (int)x & 0x0000ffff; 1482 int part12 = (int)((unsigned long)x >> 32); 1483 int part34 = (int)x; 1484 1485 // Lower word only (unsigned). 1486 if (part12 == 0) { 1487 if (part3 == 0) { 1488 if (emit) z_llill(t, part4); 1489 return 4; 1490 } 1491 if (part4 == 0) { 1492 if (emit) z_llilh(t, part3); 1493 return 4; 1494 } 1495 if (emit) z_llilf(t, part34); 1496 return 6; 1497 } 1498 1499 // Upper word only. 1500 if (part34 == 0) { 1501 if (part1 == 0) { 1502 if (emit) z_llihl(t, part2); 1503 return 4; 1504 } 1505 if (part2 == 0) { 1506 if (emit) z_llihh(t, part1); 1507 return 4; 1508 } 1509 if (emit) z_llihf(t, part12); 1510 return 6; 1511 } 1512 1513 // Lower word only (signed). 1514 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1515 if (emit) z_lgfi(t, part34); 1516 return 6; 1517 } 1518 1519 int len = 0; 1520 1521 if ((part1 == 0) || (part2 == 0)) { 1522 if (part1 == 0) { 1523 if (emit) z_llihl(t, part2); 1524 len += 4; 1525 } else { 1526 if (emit) z_llihh(t, part1); 1527 len += 4; 1528 } 1529 } else { 1530 if (emit) z_llihf(t, part12); 1531 len += 6; 1532 } 1533 1534 if ((part3 == 0) || (part4 == 0)) { 1535 if (part3 == 0) { 1536 if (emit) z_iill(t, part4); 1537 len += 4; 1538 } else { 1539 if (emit) z_iilh(t, part3); 1540 len += 4; 1541 } 1542 } else { 1543 if (emit) z_iilf(t, part34); 1544 len += 6; 1545 } 1546 return len; 1547 } 1548 1549 //===================================================================== 1550 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1551 //===================================================================== 1552 1553 // Note: In the worst case, one of the scratch registers is destroyed!!! 1554 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1555 // Right operand is constant. 1556 if (x2.is_constant()) { 1557 jlong value = x2.as_constant(); 1558 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1559 return; 1560 } 1561 1562 // Right operand is in register. 1563 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1564 } 1565 1566 // Note: In the worst case, one of the scratch registers is destroyed!!! 1567 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1568 // Right operand is constant. 1569 if (x2.is_constant()) { 1570 jlong value = x2.as_constant(); 1571 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1572 return; 1573 } 1574 1575 // Right operand is in register. 1576 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1577 } 1578 1579 // Note: In the worst case, one of the scratch registers is destroyed!!! 1580 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1581 // Right operand is constant. 1582 if (x2.is_constant()) { 1583 jlong value = x2.as_constant(); 1584 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1585 return; 1586 } 1587 1588 // Right operand is in register. 1589 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1590 } 1591 1592 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1593 // Right operand is constant. 1594 if (x2.is_constant()) { 1595 jlong value = x2.as_constant(); 1596 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1597 return; 1598 } 1599 1600 // Right operand is in register. 1601 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1602 } 1603 1604 // Generate an optimal branch to the branch target. 1605 // Optimal means that a relative branch (brc or brcl) is used if the 1606 // branch distance is short enough. Loading the target address into a 1607 // register and branching via reg is used as fallback only. 1608 // 1609 // Used registers: 1610 // Z_R1 - work reg. Holds branch target address. 1611 // Used in fallback case only. 1612 // 1613 // This version of branch_optimized is good for cases where the target address is known 1614 // and constant, i.e. is never changed (no relocation, no patching). 1615 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1616 address branch_origin = pc(); 1617 1618 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1619 z_brc(cond, branch_addr); 1620 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1621 z_brcl(cond, branch_addr); 1622 } else { 1623 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1624 z_bcr(cond, Z_R1); 1625 } 1626 } 1627 1628 // This version of branch_optimized is good for cases where the target address 1629 // is potentially not yet known at the time the code is emitted. 1630 // 1631 // One very common case is a branch to an unbound label which is handled here. 1632 // The caller might know (or hope) that the branch distance is short enough 1633 // to be encoded in a 16bit relative address. In this case he will pass a 1634 // NearLabel branch_target. 1635 // Care must be taken with unbound labels. Each call to target(label) creates 1636 // an entry in the patch queue for that label to patch all references of the label 1637 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1638 // an assertion fires at patch time. 1639 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1640 if (branch_target.is_bound()) { 1641 address branch_addr = target(branch_target); 1642 branch_optimized(cond, branch_addr); 1643 } else if (branch_target.is_near()) { 1644 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1645 } else { 1646 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1647 } 1648 } 1649 1650 // Generate an optimal compare and branch to the branch target. 1651 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1652 // branch distance is short enough. Loading the target address into a 1653 // register and branching via reg is used as fallback only. 1654 // 1655 // Input: 1656 // r1 - left compare operand 1657 // r2 - right compare operand 1658 void MacroAssembler::compare_and_branch_optimized(Register r1, 1659 Register r2, 1660 Assembler::branch_condition cond, 1661 address branch_addr, 1662 bool len64, 1663 bool has_sign) { 1664 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1665 1666 address branch_origin = pc(); 1667 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1668 switch (casenum) { 1669 case 0: z_crj( r1, r2, cond, branch_addr); break; 1670 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1671 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1672 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1673 default: ShouldNotReachHere(); break; 1674 } 1675 } else { 1676 switch (casenum) { 1677 case 0: z_cr( r1, r2); break; 1678 case 1: z_clr(r1, r2); break; 1679 case 2: z_cgr(r1, r2); break; 1680 case 3: z_clgr(r1, r2); break; 1681 default: ShouldNotReachHere(); break; 1682 } 1683 branch_optimized(cond, branch_addr); 1684 } 1685 } 1686 1687 // Generate an optimal compare and branch to the branch target. 1688 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1689 // branch distance is short enough. Loading the target address into a 1690 // register and branching via reg is used as fallback only. 1691 // 1692 // Input: 1693 // r1 - left compare operand (in register) 1694 // x2 - right compare operand (immediate) 1695 void MacroAssembler::compare_and_branch_optimized(Register r1, 1696 jlong x2, 1697 Assembler::branch_condition cond, 1698 Label& branch_target, 1699 bool len64, 1700 bool has_sign) { 1701 address branch_origin = pc(); 1702 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1703 bool is_RelAddr16 = branch_target.is_near() || 1704 (branch_target.is_bound() && 1705 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1706 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1707 1708 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1709 switch (casenum) { 1710 case 0: z_cij( r1, x2, cond, branch_target); break; 1711 case 1: z_clij(r1, x2, cond, branch_target); break; 1712 case 2: z_cgij(r1, x2, cond, branch_target); break; 1713 case 3: z_clgij(r1, x2, cond, branch_target); break; 1714 default: ShouldNotReachHere(); break; 1715 } 1716 return; 1717 } 1718 1719 if (x2 == 0) { 1720 switch (casenum) { 1721 case 0: z_ltr(r1, r1); break; 1722 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1723 case 2: z_ltgr(r1, r1); break; 1724 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1725 default: ShouldNotReachHere(); break; 1726 } 1727 } else { 1728 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1729 switch (casenum) { 1730 case 0: z_chi(r1, x2); break; 1731 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1732 case 2: z_cghi(r1, x2); break; 1733 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1734 default: break; 1735 } 1736 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1737 switch (casenum) { 1738 case 0: z_cfi( r1, x2); break; 1739 case 1: z_clfi(r1, x2); break; 1740 case 2: z_cgfi(r1, x2); break; 1741 case 3: z_clgfi(r1, x2); break; 1742 default: ShouldNotReachHere(); break; 1743 } 1744 } else { 1745 // No instruction with immediate operand possible, so load into register. 1746 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1747 load_const_optimized(scratch, x2); 1748 switch (casenum) { 1749 case 0: z_cr( r1, scratch); break; 1750 case 1: z_clr(r1, scratch); break; 1751 case 2: z_cgr(r1, scratch); break; 1752 case 3: z_clgr(r1, scratch); break; 1753 default: ShouldNotReachHere(); break; 1754 } 1755 } 1756 } 1757 branch_optimized(cond, branch_target); 1758 } 1759 1760 // Generate an optimal compare and branch to the branch target. 1761 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1762 // branch distance is short enough. Loading the target address into a 1763 // register and branching via reg is used as fallback only. 1764 // 1765 // Input: 1766 // r1 - left compare operand 1767 // r2 - right compare operand 1768 void MacroAssembler::compare_and_branch_optimized(Register r1, 1769 Register r2, 1770 Assembler::branch_condition cond, 1771 Label& branch_target, 1772 bool len64, 1773 bool has_sign) { 1774 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1775 1776 if (branch_target.is_bound()) { 1777 address branch_addr = target(branch_target); 1778 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1779 } else { 1780 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1781 switch (casenum) { 1782 case 0: z_crj( r1, r2, cond, branch_target); break; 1783 case 1: z_clrj( r1, r2, cond, branch_target); break; 1784 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1785 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1786 default: ShouldNotReachHere(); break; 1787 } 1788 } else { 1789 switch (casenum) { 1790 case 0: z_cr( r1, r2); break; 1791 case 1: z_clr(r1, r2); break; 1792 case 2: z_cgr(r1, r2); break; 1793 case 3: z_clgr(r1, r2); break; 1794 default: ShouldNotReachHere(); break; 1795 } 1796 branch_optimized(cond, branch_target); 1797 } 1798 } 1799 } 1800 1801 //=========================================================================== 1802 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1803 //=========================================================================== 1804 1805 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1806 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1807 int index = oop_recorder()->allocate_metadata_index(obj); 1808 RelocationHolder rspec = metadata_Relocation::spec(index); 1809 return AddressLiteral((address)obj, rspec); 1810 } 1811 1812 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1813 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1814 int index = oop_recorder()->find_index(obj); 1815 RelocationHolder rspec = metadata_Relocation::spec(index); 1816 return AddressLiteral((address)obj, rspec); 1817 } 1818 1819 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1820 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1821 int oop_index = oop_recorder()->allocate_oop_index(obj); 1822 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1823 } 1824 1825 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1826 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1827 int oop_index = oop_recorder()->find_index(obj); 1828 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1829 } 1830 1831 // NOTE: destroys r 1832 void MacroAssembler::c2bool(Register r, Register t) { 1833 z_lcr(t, r); // t = -r 1834 z_or(r, t); // r = -r OR r 1835 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1836 } 1837 1838 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1839 // and return the resulting instruction. 1840 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1841 // relative positions. 1842 // Use correct argument types. Do not pre-calculate distance. 1843 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1844 int c = 0; 1845 unsigned long patched_inst = 0; 1846 if (is_call_pcrelative_short(inst) || 1847 is_branch_pcrelative_short(inst) || 1848 is_branchoncount_pcrelative_short(inst) || 1849 is_branchonindex32_pcrelative_short(inst)) { 1850 c = 1; 1851 int m = fmask(15, 0); // simm16(-1, 16, 32); 1852 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1853 patched_inst = (inst & ~m) | v; 1854 } else if (is_compareandbranch_pcrelative_short(inst)) { 1855 c = 2; 1856 long m = fmask(31, 16); // simm16(-1, 16, 48); 1857 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1858 patched_inst = (inst & ~m) | v; 1859 } else if (is_branchonindex64_pcrelative_short(inst)) { 1860 c = 3; 1861 long m = fmask(31, 16); // simm16(-1, 16, 48); 1862 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1863 patched_inst = (inst & ~m) | v; 1864 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1865 c = 4; 1866 long m = fmask(31, 0); // simm32(-1, 16, 48); 1867 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1868 patched_inst = (inst & ~m) | v; 1869 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1870 c = 5; 1871 long m = fmask(31, 0); // simm32(-1, 16, 48); 1872 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1873 patched_inst = (inst & ~m) | v; 1874 } else { 1875 print_dbg_msg(tty, inst, "not a relative branch", 0); 1876 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1877 ShouldNotReachHere(); 1878 } 1879 1880 long new_off = get_pcrel_offset(patched_inst); 1881 if (new_off != (dest_pos-inst_pos)) { 1882 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1883 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1884 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1885 #ifdef LUCY_DBG 1886 VM_Version::z_SIGSEGV(); 1887 #endif 1888 ShouldNotReachHere(); 1889 } 1890 return patched_inst; 1891 } 1892 1893 // Only called when binding labels (share/vm/asm/assembler.cpp) 1894 // Pass arguments as intended. Do not pre-calculate distance. 1895 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1896 unsigned long stub_inst; 1897 int inst_len = get_instruction(branch, &stub_inst); 1898 1899 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1900 } 1901 1902 1903 // Extract relative address (aka offset). 1904 // inv_simm16 works for 4-byte instructions only. 1905 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1906 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1907 1908 if (MacroAssembler::is_pcrelative_short(inst)) { 1909 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1910 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1911 } else { 1912 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1913 } 1914 } 1915 1916 if (MacroAssembler::is_pcrelative_long(inst)) { 1917 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1918 } 1919 1920 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1921 #ifdef LUCY_DBG 1922 VM_Version::z_SIGSEGV(); 1923 #else 1924 ShouldNotReachHere(); 1925 #endif 1926 return -1; 1927 } 1928 1929 long MacroAssembler::get_pcrel_offset(address pc) { 1930 unsigned long inst; 1931 unsigned int len = get_instruction(pc, &inst); 1932 1933 #ifdef ASSERT 1934 long offset; 1935 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1936 offset = get_pcrel_offset(inst); 1937 } else { 1938 offset = -1; 1939 } 1940 1941 if (offset == -1) { 1942 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1943 #ifdef LUCY_DBG 1944 VM_Version::z_SIGSEGV(); 1945 #else 1946 ShouldNotReachHere(); 1947 #endif 1948 } 1949 return offset; 1950 #else 1951 return get_pcrel_offset(inst); 1952 #endif // ASSERT 1953 } 1954 1955 // Get target address from pc-relative instructions. 1956 address MacroAssembler::get_target_addr_pcrel(address pc) { 1957 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1958 return pc + get_pcrel_offset(pc); 1959 } 1960 1961 // Patch pc relative load address. 1962 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1963 unsigned long inst; 1964 // Offset is +/- 2**32 -> use long. 1965 ptrdiff_t distance = con - pc; 1966 1967 get_instruction(pc, &inst); 1968 1969 if (is_pcrelative_short(inst)) { 1970 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1971 1972 // Some extra safety net. 1973 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1974 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1975 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1976 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1977 } 1978 return; 1979 } 1980 1981 if (is_pcrelative_long(inst)) { 1982 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1983 1984 // Some Extra safety net. 1985 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1986 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1987 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1988 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1989 } 1990 return; 1991 } 1992 1993 guarantee(false, "not a pcrelative instruction to patch!"); 1994 } 1995 1996 // "Current PC" here means the address just behind the basr instruction. 1997 address MacroAssembler::get_PC(Register result) { 1998 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 1999 return pc(); 2000 } 2001 2002 // Get current PC + offset. 2003 // Offset given in bytes, must be even! 2004 // "Current PC" here means the address of the larl instruction plus the given offset. 2005 address MacroAssembler::get_PC(Register result, int64_t offset) { 2006 address here = pc(); 2007 z_larl(result, offset/2); // Save target instruction address in result. 2008 return here + offset; 2009 } 2010 2011 void MacroAssembler::instr_size(Register size, Register pc) { 2012 // Extract 2 most significant bits of current instruction. 2013 z_llgc(size, Address(pc)); 2014 z_srl(size, 6); 2015 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 2016 z_ahi(size, 3); 2017 z_nill(size, 6); 2018 } 2019 2020 // Resize_frame with SP(new) = SP(old) - [offset]. 2021 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 2022 { 2023 assert_different_registers(offset, fp, Z_SP); 2024 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2025 2026 z_sgr(Z_SP, offset); 2027 z_stg(fp, _z_abi(callers_sp), Z_SP); 2028 } 2029 2030 // Resize_frame with SP(new) = [newSP] + offset. 2031 // This emitter is useful if we already have calculated a pointer 2032 // into the to-be-allocated stack space, e.g. with special alignment properties, 2033 // but need some additional space, e.g. for spilling. 2034 // newSP is the pre-calculated pointer. It must not be modified. 2035 // fp holds, or is filled with, the frame pointer. 2036 // offset is the additional increment which is added to addr to form the new SP. 2037 // Note: specify a negative value to reserve more space! 2038 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2039 // It does not guarantee that fp contains the frame pointer at the end. 2040 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2041 assert_different_registers(newSP, fp, Z_SP); 2042 2043 if (load_fp) { 2044 z_lg(fp, _z_abi(callers_sp), Z_SP); 2045 } 2046 2047 add2reg(Z_SP, offset, newSP); 2048 z_stg(fp, _z_abi(callers_sp), Z_SP); 2049 } 2050 2051 // Resize_frame with SP(new) = [newSP]. 2052 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2053 // It does not guarantee that fp contains the frame pointer at the end. 2054 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2055 assert_different_registers(newSP, fp, Z_SP); 2056 2057 if (load_fp) { 2058 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2059 } 2060 2061 z_lgr(Z_SP, newSP); 2062 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2063 z_stg(fp, _z_abi(callers_sp), newSP); 2064 } else { 2065 z_stg(fp, _z_abi(callers_sp), Z_SP); 2066 } 2067 } 2068 2069 // Resize_frame with SP(new) = SP(old) + offset. 2070 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2071 assert_different_registers(fp, Z_SP); 2072 2073 if (load_fp) { 2074 z_lg(fp, _z_abi(callers_sp), Z_SP); 2075 } 2076 add64(Z_SP, offset); 2077 z_stg(fp, _z_abi(callers_sp), Z_SP); 2078 } 2079 2080 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2081 #ifdef ASSERT 2082 assert_different_registers(bytes, old_sp, Z_SP); 2083 if (!copy_sp) { 2084 z_cgr(old_sp, Z_SP); 2085 asm_assert(bcondEqual, "[old_sp]!=[Z_SP]", 0x211); 2086 } 2087 #endif 2088 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2089 if (bytes_with_inverted_sign) { 2090 z_agr(Z_SP, bytes); 2091 } else { 2092 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2093 } 2094 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2095 } 2096 2097 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2098 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2099 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2100 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2101 2102 // We must not write outside the current stack bounds (given by Z_SP). 2103 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2104 // We rely on Z_R0 by default to be available as scratch. 2105 z_lgr(scratch, Z_SP); 2106 add2reg(Z_SP, -offset); 2107 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2108 #ifdef ASSERT 2109 // Just make sure nobody uses the value in the default scratch register. 2110 // When another register is used, the caller might rely on it containing the frame pointer. 2111 if (scratch == Z_R0) { 2112 z_iihf(scratch, 0xbaadbabe); 2113 z_iilf(scratch, 0xdeadbeef); 2114 } 2115 #endif 2116 return offset; 2117 } 2118 2119 // Push a frame of size `bytes' plus abi160 on top. 2120 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2121 BLOCK_COMMENT("push_frame_abi160 {"); 2122 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2123 BLOCK_COMMENT("} push_frame_abi160"); 2124 return res; 2125 } 2126 2127 // Pop current C frame. 2128 void MacroAssembler::pop_frame() { 2129 BLOCK_COMMENT("pop_frame:"); 2130 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2131 } 2132 2133 // Pop current C frame and restore return PC register (Z_R14). 2134 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2135 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2136 int retPC_offset = _z_common_abi(return_pc) + frame_size_in_bytes; 2137 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2138 if (Displacement::is_validDisp(retPC_offset)) { 2139 z_lg(Z_R14, retPC_offset, Z_SP); 2140 add2reg(Z_SP, frame_size_in_bytes); 2141 } else { 2142 add2reg(Z_SP, frame_size_in_bytes); 2143 restore_return_pc(); 2144 } 2145 } 2146 2147 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2148 if (allow_relocation) { 2149 call_c(entry_point); 2150 } else { 2151 call_c_static(entry_point); 2152 } 2153 } 2154 2155 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2156 bool allow_relocation = true; 2157 call_VM_leaf_base(entry_point, allow_relocation); 2158 } 2159 2160 int MacroAssembler::ic_check_size() { 2161 return 30 + (ImplicitNullChecks ? 0 : 6); 2162 } 2163 2164 int MacroAssembler::ic_check(int end_alignment) { 2165 Register R2_receiver = Z_ARG1; 2166 Register R0_scratch = Z_R0_scratch; 2167 Register R1_scratch = Z_R1_scratch; 2168 Register R9_data = Z_inline_cache; 2169 Label success, failure; 2170 2171 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 2172 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 2173 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 2174 // before the inline cache check here, and not after 2175 align(end_alignment, offset() + ic_check_size()); 2176 2177 int uep_offset = offset(); 2178 if (!ImplicitNullChecks) { 2179 z_cgij(R2_receiver, 0, Assembler::bcondEqual, failure); 2180 } 2181 2182 if (UseCompressedClassPointers) { 2183 z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2184 } else { 2185 z_lg(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2186 } 2187 z_cg(R1_scratch, Address(R9_data, in_bytes(CompiledICData::speculated_klass_offset()))); 2188 z_bre(success); 2189 2190 bind(failure); 2191 load_const(R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub())); 2192 z_br(R1_scratch); 2193 bind(success); 2194 2195 assert((offset() % end_alignment) == 0, "Misaligned verified entry point, offset() = %d, end_alignment = %d", offset(), end_alignment); 2196 return uep_offset; 2197 } 2198 2199 void MacroAssembler::call_VM_base(Register oop_result, 2200 Register last_java_sp, 2201 address entry_point, 2202 bool allow_relocation, 2203 bool check_exceptions) { // Defaults to true. 2204 // Allow_relocation indicates, if true, that the generated code shall 2205 // be fit for code relocation or referenced data relocation. In other 2206 // words: all addresses must be considered variable. PC-relative addressing 2207 // is not possible then. 2208 // On the other hand, if (allow_relocation == false), addresses and offsets 2209 // may be considered stable, enabling us to take advantage of some PC-relative 2210 // addressing tweaks. These might improve performance and reduce code size. 2211 2212 // Determine last_java_sp register. 2213 if (!last_java_sp->is_valid()) { 2214 last_java_sp = Z_SP; // Load Z_SP as SP. 2215 } 2216 2217 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2218 2219 // ARG1 must hold thread address. 2220 z_lgr(Z_ARG1, Z_thread); 2221 2222 address return_pc = nullptr; 2223 if (allow_relocation) { 2224 return_pc = call_c(entry_point); 2225 } else { 2226 return_pc = call_c_static(entry_point); 2227 } 2228 2229 reset_last_Java_frame(allow_relocation); 2230 2231 // C++ interp handles this in the interpreter. 2232 check_and_handle_popframe(Z_thread); 2233 check_and_handle_earlyret(Z_thread); 2234 2235 // Check for pending exceptions. 2236 if (check_exceptions) { 2237 // Check for pending exceptions (java_thread is set upon return). 2238 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2239 2240 // This used to conditionally jump to forward_exception however it is 2241 // possible if we relocate that the branch will not reach. So we must jump 2242 // around so we can always reach. 2243 2244 Label ok; 2245 z_bre(ok); // Bcondequal is the same as bcondZero. 2246 call_stub(StubRoutines::forward_exception_entry()); 2247 bind(ok); 2248 } 2249 2250 // Get oop result if there is one and reset the value in the thread. 2251 if (oop_result->is_valid()) { 2252 get_vm_result(oop_result); 2253 } 2254 2255 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2256 } 2257 2258 void MacroAssembler::call_VM_base(Register oop_result, 2259 Register last_java_sp, 2260 address entry_point, 2261 bool check_exceptions) { // Defaults to true. 2262 bool allow_relocation = true; 2263 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2264 } 2265 2266 // VM calls without explicit last_java_sp. 2267 2268 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2269 // Call takes possible detour via InterpreterMacroAssembler. 2270 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2271 } 2272 2273 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2274 // Z_ARG1 is reserved for the thread. 2275 lgr_if_needed(Z_ARG2, arg_1); 2276 call_VM(oop_result, entry_point, check_exceptions); 2277 } 2278 2279 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2280 // Z_ARG1 is reserved for the thread. 2281 assert_different_registers(arg_2, Z_ARG2); 2282 lgr_if_needed(Z_ARG2, arg_1); 2283 lgr_if_needed(Z_ARG3, arg_2); 2284 call_VM(oop_result, entry_point, check_exceptions); 2285 } 2286 2287 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2288 Register arg_3, bool check_exceptions) { 2289 // Z_ARG1 is reserved for the thread. 2290 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2291 assert_different_registers(arg_2, Z_ARG2); 2292 lgr_if_needed(Z_ARG2, arg_1); 2293 lgr_if_needed(Z_ARG3, arg_2); 2294 lgr_if_needed(Z_ARG4, arg_3); 2295 call_VM(oop_result, entry_point, check_exceptions); 2296 } 2297 2298 // VM static calls without explicit last_java_sp. 2299 2300 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2301 // Call takes possible detour via InterpreterMacroAssembler. 2302 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2303 } 2304 2305 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2306 Register arg_3, bool check_exceptions) { 2307 // Z_ARG1 is reserved for the thread. 2308 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2309 assert_different_registers(arg_2, Z_ARG2); 2310 lgr_if_needed(Z_ARG2, arg_1); 2311 lgr_if_needed(Z_ARG3, arg_2); 2312 lgr_if_needed(Z_ARG4, arg_3); 2313 call_VM_static(oop_result, entry_point, check_exceptions); 2314 } 2315 2316 // VM calls with explicit last_java_sp. 2317 2318 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2319 // Call takes possible detour via InterpreterMacroAssembler. 2320 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2321 } 2322 2323 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2324 // Z_ARG1 is reserved for the thread. 2325 lgr_if_needed(Z_ARG2, arg_1); 2326 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2327 } 2328 2329 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2330 Register arg_2, bool check_exceptions) { 2331 // Z_ARG1 is reserved for the thread. 2332 assert_different_registers(arg_2, Z_ARG2); 2333 lgr_if_needed(Z_ARG2, arg_1); 2334 lgr_if_needed(Z_ARG3, arg_2); 2335 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2336 } 2337 2338 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2339 Register arg_2, Register arg_3, bool check_exceptions) { 2340 // Z_ARG1 is reserved for the thread. 2341 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2342 assert_different_registers(arg_2, Z_ARG2); 2343 lgr_if_needed(Z_ARG2, arg_1); 2344 lgr_if_needed(Z_ARG3, arg_2); 2345 lgr_if_needed(Z_ARG4, arg_3); 2346 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2347 } 2348 2349 // VM leaf calls. 2350 2351 void MacroAssembler::call_VM_leaf(address entry_point) { 2352 // Call takes possible detour via InterpreterMacroAssembler. 2353 call_VM_leaf_base(entry_point, true); 2354 } 2355 2356 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2357 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2358 call_VM_leaf(entry_point); 2359 } 2360 2361 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2362 assert_different_registers(arg_2, Z_ARG1); 2363 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2364 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2365 call_VM_leaf(entry_point); 2366 } 2367 2368 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2369 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2370 assert_different_registers(arg_2, Z_ARG1); 2371 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2372 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2373 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2374 call_VM_leaf(entry_point); 2375 } 2376 2377 // Static VM leaf calls. 2378 // Really static VM leaf calls are never patched. 2379 2380 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2381 // Call takes possible detour via InterpreterMacroAssembler. 2382 call_VM_leaf_base(entry_point, false); 2383 } 2384 2385 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2386 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2387 call_VM_leaf_static(entry_point); 2388 } 2389 2390 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2391 assert_different_registers(arg_2, Z_ARG1); 2392 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2393 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2394 call_VM_leaf_static(entry_point); 2395 } 2396 2397 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2398 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2399 assert_different_registers(arg_2, Z_ARG1); 2400 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2401 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2402 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2403 call_VM_leaf_static(entry_point); 2404 } 2405 2406 // Don't use detour via call_c(reg). 2407 address MacroAssembler::call_c(address function_entry) { 2408 load_const(Z_R1, function_entry); 2409 return call(Z_R1); 2410 } 2411 2412 // Variant for really static (non-relocatable) calls which are never patched. 2413 address MacroAssembler::call_c_static(address function_entry) { 2414 load_absolute_address(Z_R1, function_entry); 2415 #if 0 // def ASSERT 2416 // Verify that call site did not move. 2417 load_const_optimized(Z_R0, function_entry); 2418 z_cgr(Z_R1, Z_R0); 2419 z_brc(bcondEqual, 3); 2420 z_illtrap(0xba); 2421 #endif 2422 return call(Z_R1); 2423 } 2424 2425 address MacroAssembler::call_c_opt(address function_entry) { 2426 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2427 _last_calls_return_pc = success ? pc() : nullptr; 2428 return _last_calls_return_pc; 2429 } 2430 2431 // Identify a call_far_patchable instruction: LARL + LG + BASR 2432 // 2433 // nop ; optionally, if required for alignment 2434 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2435 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2436 // 2437 // Code pattern will eventually get patched into variant2 (see below for detection code). 2438 // 2439 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2440 address iaddr = instruction_addr; 2441 2442 // Check for the actual load instruction. 2443 if (!is_load_const_from_toc(iaddr)) { return false; } 2444 iaddr += load_const_from_toc_size(); 2445 2446 // Check for the call (BASR) instruction, finally. 2447 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2448 return is_call_byregister(iaddr); 2449 } 2450 2451 // Identify a call_far_patchable instruction: BRASL 2452 // 2453 // Code pattern to suits atomic patching: 2454 // nop ; Optionally, if required for alignment. 2455 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2456 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2457 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2458 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2459 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2460 2461 // Check for correct number of leading nops. 2462 address iaddr; 2463 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2464 if (!is_z_nop(iaddr)) { return false; } 2465 } 2466 assert(iaddr == call_addr, "sanity"); 2467 2468 // --> Check for call instruction. 2469 if (is_call_far_pcrelative(call_addr)) { 2470 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2471 return true; 2472 } 2473 2474 return false; 2475 } 2476 2477 // Emit a NOT mt-safely patchable 64 bit absolute call. 2478 // If toc_offset == -2, then the destination of the call (= target) is emitted 2479 // to the constant pool and a runtime_call relocation is added 2480 // to the code buffer. 2481 // If toc_offset != -2, target must already be in the constant pool at 2482 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2483 // from the runtime_call relocation). 2484 // Special handling of emitting to scratch buffer when there is no constant pool. 2485 // Slightly changed code pattern. We emit an additional nop if we would 2486 // not end emitting at a word aligned address. This is to ensure 2487 // an atomically patchable displacement in brasl instructions. 2488 // 2489 // A call_far_patchable comes in different flavors: 2490 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2491 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2492 // - BRASL (relative address of call target coded in instruction) 2493 // All flavors occupy the same amount of space. Length differences are compensated 2494 // by leading nops, such that the instruction sequence always ends at the same 2495 // byte offset. This is required to keep the return offset constant. 2496 // Furthermore, the return address (the end of the instruction sequence) is forced 2497 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2498 // need to patch the call target of the BRASL flavor. 2499 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2500 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2501 // Get current pc and ensure word alignment for end of instr sequence. 2502 const address start_pc = pc(); 2503 const intptr_t start_off = offset(); 2504 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2505 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2506 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2507 const bool emit_relative_call = !emit_target_to_pool && 2508 RelAddr::is_in_range_of_RelAddr32(dist) && 2509 ReoptimizeCallSequences && 2510 !code_section()->scratch_emit(); 2511 2512 if (emit_relative_call) { 2513 // Add padding to get the same size as below. 2514 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2515 unsigned int current_padding; 2516 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2517 assert(current_padding == padding, "sanity"); 2518 2519 // relative call: len = 2(nop) + 6 (brasl) 2520 // CodeBlob resize cannot occur in this case because 2521 // this call is emitted into pre-existing space. 2522 z_nop(); // Prepend each BRASL with a nop. 2523 z_brasl(Z_R14, target); 2524 } else { 2525 // absolute call: Get address from TOC. 2526 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2527 if (emit_target_to_pool) { 2528 // When emitting the call for the first time, we do not need to use 2529 // the pc-relative version. It will be patched anyway, when the code 2530 // buffer is copied. 2531 // Relocation is not needed when !ReoptimizeCallSequences. 2532 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2533 AddressLiteral dest(target, rt); 2534 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2535 // inst_mark(). Reset if possible. 2536 bool reset_mark = (inst_mark() == pc()); 2537 tocOffset = store_oop_in_toc(dest); 2538 if (reset_mark) { set_inst_mark(); } 2539 if (tocOffset == -1) { 2540 return false; // Couldn't create constant pool entry. 2541 } 2542 } 2543 assert(offset() == start_off, "emit no code before this point!"); 2544 2545 address tocPos = pc() + tocOffset; 2546 if (emit_target_to_pool) { 2547 tocPos = code()->consts()->start() + tocOffset; 2548 } 2549 load_long_pcrelative(Z_R14, tocPos); 2550 z_basr(Z_R14, Z_R14); 2551 } 2552 2553 #ifdef ASSERT 2554 // Assert that we can identify the emitted call. 2555 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2556 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2557 2558 if (emit_target_to_pool) { 2559 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2560 "wrong encoding of dest address"); 2561 } 2562 #endif 2563 return true; // success 2564 } 2565 2566 // Identify a call_far_patchable instruction. 2567 // For more detailed information see header comment of call_far_patchable. 2568 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2569 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2570 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2571 } 2572 2573 // Does the call_far_patchable instruction use a pc-relative encoding 2574 // of the call destination? 2575 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2576 // Variant 2 is pc-relative. 2577 return is_call_far_patchable_variant2_at(instruction_addr); 2578 } 2579 2580 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2581 // Prepend each BRASL with a nop. 2582 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2583 } 2584 2585 // Set destination address of a call_far_patchable instruction. 2586 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2587 ResourceMark rm; 2588 2589 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2590 int code_size = MacroAssembler::call_far_patchable_size(); 2591 CodeBuffer buf(instruction_addr, code_size); 2592 MacroAssembler masm(&buf); 2593 masm.call_far_patchable(dest, tocOffset); 2594 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2595 } 2596 2597 // Get dest address of a call_far_patchable instruction. 2598 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2599 // Dynamic TOC: absolute address in constant pool. 2600 // Check variant2 first, it is more frequent. 2601 2602 // Relative address encoded in call instruction. 2603 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2604 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2605 2606 // Absolute address in constant pool. 2607 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2608 address iaddr = instruction_addr; 2609 2610 long tocOffset = get_load_const_from_toc_offset(iaddr); 2611 address tocLoc = iaddr + tocOffset; 2612 return *(address *)(tocLoc); 2613 } else { 2614 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2615 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2616 *(unsigned long*)instruction_addr, 2617 *(unsigned long*)(instruction_addr+8), 2618 call_far_patchable_size()); 2619 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2620 ShouldNotReachHere(); 2621 return nullptr; 2622 } 2623 } 2624 2625 void MacroAssembler::align_call_far_patchable(address pc) { 2626 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2627 } 2628 2629 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2630 } 2631 2632 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2633 } 2634 2635 // Read from the polling page. 2636 // Use TM or TMY instruction, depending on read offset. 2637 // offset = 0: Use TM, safepoint polling. 2638 // offset < 0: Use TMY, profiling safepoint polling. 2639 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2640 if (Immediate::is_uimm12(offset)) { 2641 z_tm(offset, polling_page_address, mask_safepoint); 2642 } else { 2643 z_tmy(offset, polling_page_address, mask_profiling); 2644 } 2645 } 2646 2647 // Check whether z_instruction is a read access to the polling page 2648 // which was emitted by load_from_polling_page(..). 2649 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2650 unsigned long z_instruction; 2651 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2652 2653 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2654 2655 if (ilen == 4) { 2656 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2657 2658 int ms = inv_mask(z_instruction,8,32); // mask 2659 int ra = inv_reg(z_instruction,16,32); // base register 2660 int ds = inv_uimm12(z_instruction); // displacement 2661 2662 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2663 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2664 } 2665 2666 } else { /* if (ilen == 6) */ 2667 2668 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2669 2670 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2671 2672 int ms = inv_mask(z_instruction,8,48); // mask 2673 int ra = inv_reg(z_instruction,16,48); // base register 2674 int ds = inv_simm20(z_instruction); // displacement 2675 } 2676 2677 return true; 2678 } 2679 2680 // Extract poll address from instruction and ucontext. 2681 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2682 assert(ucontext != nullptr, "must have ucontext"); 2683 ucontext_t* uc = (ucontext_t*) ucontext; 2684 unsigned long z_instruction; 2685 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2686 2687 if (ilen == 4 && is_z_tm(z_instruction)) { 2688 int ra = inv_reg(z_instruction, 16, 32); // base register 2689 int ds = inv_uimm12(z_instruction); // displacement 2690 address addr = (address)uc->uc_mcontext.gregs[ra]; 2691 return addr + ds; 2692 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2693 int ra = inv_reg(z_instruction, 16, 48); // base register 2694 int ds = inv_simm20(z_instruction); // displacement 2695 address addr = (address)uc->uc_mcontext.gregs[ra]; 2696 return addr + ds; 2697 } 2698 2699 ShouldNotReachHere(); 2700 return nullptr; 2701 } 2702 2703 // Extract poll register from instruction. 2704 uint MacroAssembler::get_poll_register(address instr_loc) { 2705 unsigned long z_instruction; 2706 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2707 2708 if (ilen == 4 && is_z_tm(z_instruction)) { 2709 return (uint)inv_reg(z_instruction, 16, 32); // base register 2710 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2711 return (uint)inv_reg(z_instruction, 16, 48); // base register 2712 } 2713 2714 ShouldNotReachHere(); 2715 return 0; 2716 } 2717 2718 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2719 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2720 // Armed page has poll_bit set. 2721 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2722 z_brnaz(slow_path); 2723 } 2724 2725 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2726 void MacroAssembler::bang_stack_with_offset(int offset) { 2727 // Stack grows down, caller passes positive offset. 2728 assert(offset > 0, "must bang with positive offset"); 2729 if (Displacement::is_validDisp(-offset)) { 2730 z_tmy(-offset, Z_SP, mask_stackbang); 2731 } else { 2732 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2733 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2734 } 2735 } 2736 2737 void MacroAssembler::reserved_stack_check(Register return_pc) { 2738 // Test if reserved zone needs to be enabled. 2739 Label no_reserved_zone_enabling; 2740 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2741 BLOCK_COMMENT("reserved_stack_check {"); 2742 2743 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2744 z_brl(no_reserved_zone_enabling); 2745 2746 // Enable reserved zone again, throw stack overflow exception. 2747 save_return_pc(); 2748 push_frame_abi160(0); 2749 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2750 pop_frame(); 2751 restore_return_pc(); 2752 2753 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry()); 2754 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2755 z_br(Z_R1); 2756 2757 should_not_reach_here(); 2758 2759 bind(no_reserved_zone_enabling); 2760 BLOCK_COMMENT("} reserved_stack_check"); 2761 } 2762 2763 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2764 void MacroAssembler::tlab_allocate(Register obj, 2765 Register var_size_in_bytes, 2766 int con_size_in_bytes, 2767 Register t1, 2768 Label& slow_case) { 2769 assert_different_registers(obj, var_size_in_bytes, t1); 2770 Register end = t1; 2771 Register thread = Z_thread; 2772 2773 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2774 if (var_size_in_bytes == noreg) { 2775 z_lay(end, Address(obj, con_size_in_bytes)); 2776 } else { 2777 z_lay(end, Address(obj, var_size_in_bytes)); 2778 } 2779 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2780 branch_optimized(bcondHigh, slow_case); 2781 2782 // Update the tlab top pointer. 2783 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2784 2785 // Recover var_size_in_bytes if necessary. 2786 if (var_size_in_bytes == end) { 2787 z_sgr(var_size_in_bytes, obj); 2788 } 2789 } 2790 2791 // Emitter for interface method lookup. 2792 // input: recv_klass, intf_klass, itable_index 2793 // output: method_result 2794 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2795 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2796 // If the register is still not needed then, remove it. 2797 void MacroAssembler::lookup_interface_method(Register recv_klass, 2798 Register intf_klass, 2799 RegisterOrConstant itable_index, 2800 Register method_result, 2801 Register temp1_reg, 2802 Label& no_such_interface, 2803 bool return_method) { 2804 2805 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2806 const Register itable_entry_addr = Z_R1_scratch; 2807 const Register itable_interface = Z_R0_scratch; 2808 2809 BLOCK_COMMENT("lookup_interface_method {"); 2810 2811 // Load start of itable entries into itable_entry_addr. 2812 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2813 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2814 2815 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2816 add2reg_with_index(itable_entry_addr, 2817 in_bytes(Klass::vtable_start_offset() + itableOffsetEntry::interface_offset()), 2818 recv_klass, vtable_len); 2819 2820 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2821 Label search; 2822 2823 bind(search); 2824 2825 // Handle IncompatibleClassChangeError. 2826 // If the entry is null then we've reached the end of the table 2827 // without finding the expected interface, so throw an exception. 2828 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2829 z_bre(no_such_interface); 2830 2831 add2reg(itable_entry_addr, itable_offset_search_inc); 2832 z_cgr(itable_interface, intf_klass); 2833 z_brne(search); 2834 2835 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2836 if (return_method) { 2837 const int vtable_offset_offset = in_bytes(itableOffsetEntry::offset_offset() - 2838 itableOffsetEntry::interface_offset()) - 2839 itable_offset_search_inc; 2840 2841 // Compute itableMethodEntry and get method and entry point 2842 // we use addressing with index and displacement, since the formula 2843 // for computing the entry's offset has a fixed and a dynamic part, 2844 // the latter depending on the matched interface entry and on the case, 2845 // that the itable index has been passed as a register, not a constant value. 2846 int method_offset = in_bytes(itableMethodEntry::method_offset()); 2847 // Fixed part (displacement), common operand. 2848 Register itable_offset = method_result; // Dynamic part (index register). 2849 2850 if (itable_index.is_register()) { 2851 // Compute the method's offset in that register, for the formula, see the 2852 // else-clause below. 2853 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2854 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2855 } else { 2856 // Displacement increases. 2857 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2858 2859 // Load index from itable. 2860 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2861 } 2862 2863 // Finally load the method's oop. 2864 z_lg(method_result, method_offset, itable_offset, recv_klass); 2865 } 2866 BLOCK_COMMENT("} lookup_interface_method"); 2867 } 2868 2869 // Lookup for virtual method invocation. 2870 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2871 RegisterOrConstant vtable_index, 2872 Register method_result) { 2873 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2874 assert(vtableEntry::size() * wordSize == wordSize, 2875 "else adjust the scaling in the code below"); 2876 2877 BLOCK_COMMENT("lookup_virtual_method {"); 2878 2879 const int base = in_bytes(Klass::vtable_start_offset()); 2880 2881 if (vtable_index.is_constant()) { 2882 // Load with base + disp. 2883 Address vtable_entry_addr(recv_klass, 2884 vtable_index.as_constant() * wordSize + 2885 base + 2886 in_bytes(vtableEntry::method_offset())); 2887 2888 z_lg(method_result, vtable_entry_addr); 2889 } else { 2890 // Shift index properly and load with base + index + disp. 2891 Register vindex = vtable_index.as_register(); 2892 Address vtable_entry_addr(recv_klass, vindex, 2893 base + in_bytes(vtableEntry::method_offset())); 2894 2895 z_sllg(vindex, vindex, exact_log2(wordSize)); 2896 z_lg(method_result, vtable_entry_addr); 2897 } 2898 BLOCK_COMMENT("} lookup_virtual_method"); 2899 } 2900 2901 // Factor out code to call ic_miss_handler. 2902 // Generate code to call the inline cache miss handler. 2903 // 2904 // In most cases, this code will be generated out-of-line. 2905 // The method parameters are intended to provide some variability. 2906 // ICM - Label which has to be bound to the start of useful code (past any traps). 2907 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2908 // Any value except 0x00 is supported. 2909 // = 0x00 - do not generate illtrap instructions. 2910 // use nops to fill unused space. 2911 // requiredSize - required size of the generated code. If the actually 2912 // generated code is smaller, use padding instructions to fill up. 2913 // = 0 - no size requirement, no padding. 2914 // scratch - scratch register to hold branch target address. 2915 // 2916 // The method returns the code offset of the bound label. 2917 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2918 intptr_t startOffset = offset(); 2919 2920 // Prevent entry at content_begin(). 2921 if (trapMarker != 0) { 2922 z_illtrap(trapMarker); 2923 } 2924 2925 // Load address of inline cache miss code into scratch register 2926 // and branch to cache miss handler. 2927 BLOCK_COMMENT("IC miss handler {"); 2928 BIND(ICM); 2929 unsigned int labelOffset = offset(); 2930 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2931 2932 load_const_optimized(scratch, icmiss); 2933 z_br(scratch); 2934 2935 // Fill unused space. 2936 if (requiredSize > 0) { 2937 while ((offset() - startOffset) < requiredSize) { 2938 if (trapMarker == 0) { 2939 z_nop(); 2940 } else { 2941 z_illtrap(trapMarker); 2942 } 2943 } 2944 } 2945 BLOCK_COMMENT("} IC miss handler"); 2946 return labelOffset; 2947 } 2948 2949 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2950 Register ic_reg = Z_inline_cache; 2951 int klass_offset = oopDesc::klass_offset_in_bytes(); 2952 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2953 if (VM_Version::has_CompareBranch()) { 2954 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2955 } else { 2956 z_ltgr(Z_ARG1, Z_ARG1); 2957 z_bre(ic_miss); 2958 } 2959 } 2960 // Compare cached class against klass from receiver. 2961 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2962 z_brne(ic_miss); 2963 } 2964 2965 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2966 Register super_klass, 2967 Register temp1_reg, 2968 Label* L_success, 2969 Label* L_failure, 2970 Label* L_slow_path, 2971 RegisterOrConstant super_check_offset) { 2972 2973 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2974 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2975 2976 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2977 bool need_slow_path = (must_load_sco || 2978 super_check_offset.constant_or_zero() == sc_offset); 2979 2980 // Input registers must not overlap. 2981 assert_different_registers(sub_klass, super_klass, temp1_reg); 2982 if (super_check_offset.is_register()) { 2983 assert_different_registers(sub_klass, super_klass, 2984 super_check_offset.as_register()); 2985 } else if (must_load_sco) { 2986 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 2987 } 2988 2989 const Register Rsuper_check_offset = temp1_reg; 2990 2991 NearLabel L_fallthrough; 2992 int label_nulls = 0; 2993 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 2994 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 2995 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 2996 assert(label_nulls <= 1 || 2997 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 2998 "at most one null in the batch, usually"); 2999 3000 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 3001 // If the pointers are equal, we are done (e.g., String[] elements). 3002 // This self-check enables sharing of secondary supertype arrays among 3003 // non-primary types such as array-of-interface. Otherwise, each such 3004 // type would need its own customized SSA. 3005 // We move this check to the front of the fast path because many 3006 // type checks are in fact trivially successful in this manner, 3007 // so we get a nicely predicted branch right at the start of the check. 3008 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 3009 3010 // Check the supertype display, which is uint. 3011 if (must_load_sco) { 3012 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 3013 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 3014 } 3015 Address super_check_addr(sub_klass, super_check_offset, 0); 3016 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 3017 3018 // This check has worked decisively for primary supers. 3019 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3020 // (Secondary supers are interfaces and very deeply nested subtypes.) 3021 // This works in the same check above because of a tricky aliasing 3022 // between the super_cache and the primary super display elements. 3023 // (The 'super_check_addr' can address either, as the case requires.) 3024 // Note that the cache is updated below if it does not help us find 3025 // what we need immediately. 3026 // So if it was a primary super, we can just fail immediately. 3027 // Otherwise, it's the slow path for us (no success at this point). 3028 3029 // Hacked jmp, which may only be used just before L_fallthrough. 3030 #define final_jmp(label) \ 3031 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3032 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 3033 3034 if (super_check_offset.is_register()) { 3035 branch_optimized(Assembler::bcondEqual, *L_success); 3036 z_cfi(super_check_offset.as_register(), sc_offset); 3037 if (L_failure == &L_fallthrough) { 3038 branch_optimized(Assembler::bcondEqual, *L_slow_path); 3039 } else { 3040 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3041 final_jmp(*L_slow_path); 3042 } 3043 } else if (super_check_offset.as_constant() == sc_offset) { 3044 // Need a slow path; fast failure is impossible. 3045 if (L_slow_path == &L_fallthrough) { 3046 branch_optimized(Assembler::bcondEqual, *L_success); 3047 } else { 3048 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 3049 final_jmp(*L_success); 3050 } 3051 } else { 3052 // No slow path; it's a fast decision. 3053 if (L_failure == &L_fallthrough) { 3054 branch_optimized(Assembler::bcondEqual, *L_success); 3055 } else { 3056 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3057 final_jmp(*L_success); 3058 } 3059 } 3060 3061 bind(L_fallthrough); 3062 #undef local_brc 3063 #undef final_jmp 3064 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3065 // fallthru (to slow path) 3066 } 3067 3068 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3069 Register Rsuperklass, 3070 Register Rarray_ptr, // tmp 3071 Register Rlength, // tmp 3072 Label* L_success, 3073 Label* L_failure) { 3074 // Input registers must not overlap. 3075 // Also check for R1 which is explicitly used here. 3076 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3077 NearLabel L_fallthrough; 3078 int label_nulls = 0; 3079 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3080 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3081 assert(label_nulls <= 1, "at most one null in the batch"); 3082 3083 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3084 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3085 3086 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3087 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3088 3089 // Hacked jmp, which may only be used just before L_fallthrough. 3090 #define final_jmp(label) \ 3091 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3092 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3093 3094 NearLabel loop_iterate, loop_count, match; 3095 3096 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3097 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3098 3099 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3100 branch_optimized(Assembler::bcondZero, *L_failure); 3101 3102 // Oops in table are NO MORE compressed. 3103 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3104 z_bre(match); // Shortcut for array length = 1. 3105 3106 // No match yet, so we must walk the array's elements. 3107 z_lngfr(Rlength, Rlength); 3108 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3109 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3110 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3111 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3112 z_bru(loop_count); 3113 3114 BIND(loop_iterate); 3115 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3116 z_bre(match); 3117 BIND(loop_count); 3118 z_brxlg(Rlength, Z_R1, loop_iterate); 3119 3120 // Rsuperklass not found among secondary super classes -> failure. 3121 branch_optimized(Assembler::bcondAlways, *L_failure); 3122 3123 // Got a hit. Return success (zero result). Set cache. 3124 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3125 3126 BIND(match); 3127 3128 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3129 3130 final_jmp(*L_success); 3131 3132 // Exit to the surrounding code. 3133 BIND(L_fallthrough); 3134 #undef local_brc 3135 #undef final_jmp 3136 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3137 } 3138 3139 // Emitter for combining fast and slow path. 3140 void MacroAssembler::check_klass_subtype(Register sub_klass, 3141 Register super_klass, 3142 Register temp1_reg, 3143 Register temp2_reg, 3144 Label& L_success) { 3145 NearLabel failure; 3146 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3147 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3148 &L_success, &failure, nullptr); 3149 check_klass_subtype_slow_path(sub_klass, super_klass, 3150 temp1_reg, temp2_reg, &L_success, nullptr); 3151 BIND(failure); 3152 BLOCK_COMMENT("} check_klass_subtype"); 3153 } 3154 3155 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3156 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 3157 3158 Label L_fallthrough; 3159 if (L_fast_path == nullptr) { 3160 L_fast_path = &L_fallthrough; 3161 } else if (L_slow_path == nullptr) { 3162 L_slow_path = &L_fallthrough; 3163 } 3164 3165 // Fast path check: class is fully initialized 3166 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3167 z_bre(*L_fast_path); 3168 3169 // Fast path check: current thread is initializer thread 3170 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3171 if (L_slow_path == &L_fallthrough) { 3172 z_bre(*L_fast_path); 3173 } else if (L_fast_path == &L_fallthrough) { 3174 z_brne(*L_slow_path); 3175 } else { 3176 Unimplemented(); 3177 } 3178 3179 bind(L_fallthrough); 3180 } 3181 3182 // Increment a counter at counter_address when the eq condition code is 3183 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3184 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3185 Label l; 3186 z_brne(l); 3187 load_const(tmp1_reg, counter_address); 3188 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3189 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3190 bind(l); 3191 } 3192 3193 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3194 Register displacedHeader = temp1; 3195 Register currentHeader = temp1; 3196 Register temp = temp2; 3197 NearLabel done, object_has_monitor; 3198 3199 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3200 3201 assert_different_registers(temp1, temp2, oop, box); 3202 3203 BLOCK_COMMENT("compiler_fast_lock_object {"); 3204 3205 // Load markWord from oop into mark. 3206 z_lg(displacedHeader, hdr_offset, oop); 3207 3208 if (DiagnoseSyncOnValueBasedClasses != 0) { 3209 load_klass(temp, oop); 3210 testbit(Address(temp, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); 3211 z_btrue(done); 3212 } 3213 3214 // Handle existing monitor. 3215 // The object has an existing monitor iff (mark & monitor_value) != 0. 3216 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3217 z_tmll(displacedHeader, markWord::monitor_value); 3218 z_brnaz(object_has_monitor); 3219 3220 if (LockingMode == LM_MONITOR) { 3221 // Set NE to indicate 'failure' -> take slow-path 3222 // From loading the markWord, we know that oop != nullptr 3223 z_ltgr(oop, oop); 3224 z_bru(done); 3225 } else if (LockingMode == LM_LEGACY) { 3226 // Set mark to markWord | markWord::unlocked_value. 3227 z_oill(displacedHeader, markWord::unlocked_value); 3228 3229 // Load Compare Value application register. 3230 3231 // Initialize the box (must happen before we update the object mark). 3232 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3233 3234 // Compare object markWord with mark and if equal, exchange box with object markWork. 3235 // If the compare-and-swap succeeds, then we found an unlocked object and have now locked it. 3236 z_csg(displacedHeader, box, hdr_offset, oop); 3237 assert(currentHeader == displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3238 z_bre(done); 3239 3240 // We did not see an unlocked object 3241 // currentHeader contains what is currently stored in the oop's markWord. 3242 // We might have a recursive case. Verify by checking if the owner is self. 3243 // To do so, compare the value in the markWord (currentHeader) with the stack pointer. 3244 z_sgr(currentHeader, Z_SP); 3245 load_const_optimized(temp, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); 3246 3247 z_ngr(currentHeader, temp); 3248 3249 // result zero: owner is self -> recursive lock. Indicate that by storing 0 in the box. 3250 // result not-zero: attempt failed. We don't hold the lock -> go for slow case. 3251 3252 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3253 3254 z_bru(done); 3255 } else { 3256 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 3257 lightweight_lock(oop, displacedHeader, temp, done); 3258 z_bru(done); 3259 } 3260 3261 bind(object_has_monitor); 3262 3263 if (!UseObjectMonitorTable) { 3264 Register zero = temp; 3265 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3266 // The object's monitor m is unlocked iff m->owner is null, 3267 // otherwise m->owner may contain a thread or a stack address. 3268 3269 // Try to CAS m->owner from null to current thread. 3270 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. 3271 // Otherwise, register zero is filled with the current owner. 3272 z_lghi(zero, 0); 3273 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3274 if (LockingMode != LM_LIGHTWEIGHT) { 3275 // Store a non-null value into the box. 3276 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3277 } 3278 3279 z_bre(done); // acquired the lock for the first time. 3280 3281 BLOCK_COMMENT("fast_path_recursive_lock {"); 3282 // Check if we are already the owner (recursive lock) 3283 z_cgr(Z_thread, zero); // owner is stored in zero by "z_csg" above 3284 z_brne(done); // not a recursive lock 3285 3286 // Current thread already owns the lock. Just increment recursion count. 3287 z_agsi(Address(monitor_tagged, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll); 3288 z_cgr(zero, zero); // set the CC to EQUAL 3289 BLOCK_COMMENT("} fast_path_recursive_lock"); 3290 } else { 3291 // OMCache lookup not supported yet. Take the slowpath. 3292 // Set flag to NE 3293 z_ltgr(oop, oop); 3294 z_bru(done); 3295 } 3296 bind(done); 3297 3298 BLOCK_COMMENT("} compiler_fast_lock_object"); 3299 // If locking was successful, CR should indicate 'EQ'. 3300 // The compiler or the native wrapper generates a branch to the runtime call 3301 // _complete_monitor_locking_Java. 3302 } 3303 3304 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3305 Register displacedHeader = temp1; 3306 Register currentHeader = temp2; 3307 Register temp = temp1; 3308 3309 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3310 3311 assert_different_registers(temp1, temp2, oop, box); 3312 3313 Label done, object_has_monitor, not_recursive; 3314 3315 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3316 3317 if (LockingMode == LM_LEGACY) { 3318 // Find the lock address and load the displaced header from the stack. 3319 // if the displaced header is zero, we have a recursive unlock. 3320 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3321 z_bre(done); 3322 } 3323 3324 // Handle existing monitor. 3325 // The object has an existing monitor iff (mark & monitor_value) != 0. 3326 z_lg(currentHeader, hdr_offset, oop); 3327 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3328 3329 z_tmll(currentHeader, markWord::monitor_value); 3330 z_brnaz(object_has_monitor); 3331 3332 if (LockingMode == LM_MONITOR) { 3333 // Set NE to indicate 'failure' -> take slow-path 3334 z_ltgr(oop, oop); 3335 z_bru(done); 3336 } else if (LockingMode == LM_LEGACY) { 3337 // Check if it is still a lightweight lock, this is true if we see 3338 // the stack address of the basicLock in the markWord of the object 3339 // copy box to currentHeader such that csg does not kill it. 3340 z_lgr(currentHeader, box); 3341 z_csg(currentHeader, displacedHeader, hdr_offset, oop); 3342 z_bru(done); // csg sets CR as desired. 3343 } else { 3344 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 3345 3346 lightweight_unlock(oop, currentHeader, displacedHeader, done); 3347 z_bru(done); 3348 } 3349 3350 // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0. 3351 // This is handled like owner thread mismatches: We take the slow path. 3352 3353 // Handle existing monitor. 3354 bind(object_has_monitor); 3355 3356 if (!UseObjectMonitorTable) { 3357 z_cg(Z_thread, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3358 z_brne(done); 3359 3360 BLOCK_COMMENT("fast_path_recursive_unlock {"); 3361 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3362 z_bre(not_recursive); // if 0 then jump, it's not recursive locking 3363 3364 // Recursive inflated unlock 3365 z_agsi(Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll); 3366 z_cgr(currentHeader, currentHeader); // set the CC to EQUAL 3367 BLOCK_COMMENT("} fast_path_recursive_unlock"); 3368 z_bru(done); 3369 3370 bind(not_recursive); 3371 3372 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3373 z_brne(done); 3374 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3375 z_brne(done); 3376 z_release(); 3377 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3378 } else { 3379 // OMCache lookup not supported yet. Take the slowpath. 3380 // Set flag to NE 3381 z_ltgr(oop, oop); 3382 z_bru(done); 3383 } 3384 3385 bind(done); 3386 3387 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3388 // flag == EQ indicates success 3389 // flag == NE indicates failure 3390 } 3391 3392 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3393 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3394 bs->resolve_jobject(this, value, tmp1, tmp2); 3395 } 3396 3397 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3398 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3399 BLOCK_COMMENT("set_last_Java_frame {"); 3400 3401 // Always set last_Java_pc and flags first because once last_Java_sp 3402 // is visible has_last_Java_frame is true and users will look at the 3403 // rest of the fields. (Note: flags should always be zero before we 3404 // get here so doesn't need to be set.) 3405 3406 // Verify that last_Java_pc was zeroed on return to Java. 3407 if (allow_relocation) { 3408 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3409 Z_thread, 3410 "last_Java_pc not zeroed before leaving Java", 3411 0x200); 3412 } else { 3413 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3414 Z_thread, 3415 "last_Java_pc not zeroed before leaving Java", 3416 0x200); 3417 } 3418 3419 // When returning from calling out from Java mode the frame anchor's 3420 // last_Java_pc will always be set to null. It is set here so that 3421 // if we are doing a call to native (not VM) that we capture the 3422 // known pc and don't have to rely on the native call having a 3423 // standard frame linkage where we can find the pc. 3424 if (last_Java_pc!=noreg) { 3425 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3426 } 3427 3428 // This membar release is not required on z/Architecture, since the sequence of stores 3429 // in maintained. Nevertheless, we leave it in to document the required ordering. 3430 // The implementation of z_release() should be empty. 3431 // z_release(); 3432 3433 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3434 BLOCK_COMMENT("} set_last_Java_frame"); 3435 } 3436 3437 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3438 BLOCK_COMMENT("reset_last_Java_frame {"); 3439 3440 if (allow_relocation) { 3441 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3442 Z_thread, 3443 "SP was not set, still zero", 3444 0x202); 3445 } else { 3446 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3447 Z_thread, 3448 "SP was not set, still zero", 3449 0x202); 3450 } 3451 3452 // _last_Java_sp = 0 3453 // Clearing storage must be atomic here, so don't use clear_mem()! 3454 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3455 3456 // _last_Java_pc = 0 3457 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3458 3459 BLOCK_COMMENT("} reset_last_Java_frame"); 3460 return; 3461 } 3462 3463 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3464 assert_different_registers(sp, tmp1); 3465 3466 // We cannot trust that code generated by the C++ compiler saves R14 3467 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3468 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3469 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3470 // it into the frame anchor. 3471 get_PC(tmp1); 3472 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3473 } 3474 3475 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3476 z_release(); 3477 3478 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3479 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3480 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3481 } 3482 3483 void MacroAssembler::get_vm_result(Register oop_result) { 3484 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3485 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3486 3487 verify_oop(oop_result, FILE_AND_LINE); 3488 } 3489 3490 void MacroAssembler::get_vm_result_2(Register result) { 3491 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3492 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3493 } 3494 3495 // We require that C code which does not return a value in vm_result will 3496 // leave it undisturbed. 3497 void MacroAssembler::set_vm_result(Register oop_result) { 3498 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3499 } 3500 3501 // Explicit null checks (used for method handle code). 3502 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3503 if (!ImplicitNullChecks) { 3504 NearLabel ok; 3505 3506 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3507 3508 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3509 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3510 load_absolute_address(reg, exception_entry); 3511 z_br(reg); 3512 3513 bind(ok); 3514 } else { 3515 if (needs_explicit_null_check((intptr_t)offset)) { 3516 // Provoke OS null exception if reg is null by 3517 // accessing M[reg] w/o changing any registers. 3518 z_lg(tmp, 0, reg); 3519 } 3520 // else 3521 // Nothing to do, (later) access of M[reg + offset] 3522 // will provoke OS null exception if reg is null. 3523 } 3524 } 3525 3526 //------------------------------------- 3527 // Compressed Klass Pointers 3528 //------------------------------------- 3529 3530 // Klass oop manipulations if compressed. 3531 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3532 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3533 address base = CompressedKlassPointers::base(); 3534 int shift = CompressedKlassPointers::shift(); 3535 bool need_zero_extend = base != 0; 3536 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3537 3538 BLOCK_COMMENT("cKlass encoder {"); 3539 3540 #ifdef ASSERT 3541 Label ok; 3542 z_tmll(current, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 3543 z_brc(Assembler::bcondAllZero, ok); 3544 // The plain disassembler does not recognize illtrap. It instead displays 3545 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3546 // the proper beginning of the next instruction. 3547 z_illtrap(0xee); 3548 z_illtrap(0xee); 3549 bind(ok); 3550 #endif 3551 3552 // Scale down the incoming klass pointer first. 3553 // We then can be sure we calculate an offset that fits into 32 bit. 3554 // More generally speaking: all subsequent calculations are purely 32-bit. 3555 if (shift != 0) { 3556 z_srlg(dst, current, shift); 3557 current = dst; 3558 } 3559 3560 if (base != nullptr) { 3561 // Use scaled-down base address parts to match scaled-down klass pointer. 3562 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3563 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3564 3565 // General considerations: 3566 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3567 // Otherwise, we would end up with a compressed klass pointer which doesn't 3568 // fit into 32-bit. 3569 // - Only bit#33 of the difference could potentially be non-zero. For that 3570 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3571 // will create a borrow out of bit#32, nicely killing bit#33. 3572 // - With the above, we only need to consider current_l and base_l to 3573 // calculate the result. 3574 // - Both values are treated as unsigned. The unsigned subtraction is 3575 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3576 3577 if (base_l == 0) { 3578 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3579 // cancel all high-word bits. Otherwise, we would end up with an offset 3580 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3581 // - current_l remains unchanged. 3582 // - Therefore, we can replace all calculation with just a 3583 // zero-extending load 32 to 64 bit. 3584 // - Even that can be replaced with a conditional load if dst != current. 3585 // (this is a local view. The shift step may have requested zero-extension). 3586 } else { 3587 if ((base_h == 0) && is_uimm(base_l, 31)) { 3588 // If we happen to find that (base_h == 0), and that base_l is within the range 3589 // which can be represented by a signed int, then we can use 64bit signed add with 3590 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3591 // upper 32 bits of the result, saving us the need of an extra zero extension. 3592 // For base_l to be in the required range, it must not have the most significant 3593 // bit (aka sign bit) set. 3594 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3595 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3596 need_zero_extend = false; 3597 current = dst; 3598 } else { 3599 // To begin with, we may need to copy and/or zero-extend the register operand. 3600 // We have to calculate (current_l - base_l). Because there is no unsigend 3601 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3602 if (need_zero_extend) { 3603 z_llgfr(dst, current); 3604 need_zero_extend = false; 3605 } else { 3606 llgfr_if_needed(dst, current); 3607 } 3608 current = dst; 3609 z_alfi(dst, -base_l); 3610 } 3611 } 3612 } 3613 3614 if (need_zero_extend) { 3615 // We must zero-extend the calculated result. It may have some leftover bits in 3616 // the hi-word because we only did optimized calculations. 3617 z_llgfr(dst, current); 3618 } else { 3619 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3620 } 3621 3622 BLOCK_COMMENT("} cKlass encoder"); 3623 } 3624 3625 // This function calculates the size of the code generated by 3626 // decode_klass_not_null(register dst, Register src) 3627 // when Universe::heap() isn't null. Hence, if the instructions 3628 // it generates change, then this method needs to be updated. 3629 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3630 address base = CompressedKlassPointers::base(); 3631 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3632 int addbase_size = 0; 3633 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3634 3635 if (base != nullptr) { 3636 unsigned int base_h = ((unsigned long)base)>>32; 3637 unsigned int base_l = (unsigned int)((unsigned long)base); 3638 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3639 addbase_size += 6; /* aih */ 3640 } else if ((base_h == 0) && (base_l != 0)) { 3641 addbase_size += 6; /* algfi */ 3642 } else { 3643 addbase_size += load_const_size(); 3644 addbase_size += 4; /* algr */ 3645 } 3646 } 3647 #ifdef ASSERT 3648 addbase_size += 10; 3649 addbase_size += 2; // Extra sigill. 3650 #endif 3651 return addbase_size + shift_size; 3652 } 3653 3654 // !!! If the instructions that get generated here change 3655 // then function instr_size_for_decode_klass_not_null() 3656 // needs to get updated. 3657 // This variant of decode_klass_not_null() must generate predictable code! 3658 // The code must only depend on globally known parameters. 3659 void MacroAssembler::decode_klass_not_null(Register dst) { 3660 address base = CompressedKlassPointers::base(); 3661 int shift = CompressedKlassPointers::shift(); 3662 int beg_off = offset(); 3663 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3664 3665 BLOCK_COMMENT("cKlass decoder (const size) {"); 3666 3667 if (shift != 0) { // Shift required? 3668 z_sllg(dst, dst, shift); 3669 } 3670 if (base != nullptr) { 3671 unsigned int base_h = ((unsigned long)base)>>32; 3672 unsigned int base_l = (unsigned int)((unsigned long)base); 3673 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3674 z_aih(dst, base_h); // Base has no set bits in lower half. 3675 } else if ((base_h == 0) && (base_l != 0)) { 3676 z_algfi(dst, base_l); // Base has no set bits in upper half. 3677 } else { 3678 load_const(Z_R0, base); // Base has set bits everywhere. 3679 z_algr(dst, Z_R0); 3680 } 3681 } 3682 3683 #ifdef ASSERT 3684 Label ok; 3685 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 3686 z_brc(Assembler::bcondAllZero, ok); 3687 // The plain disassembler does not recognize illtrap. It instead displays 3688 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3689 // the proper beginning of the next instruction. 3690 z_illtrap(0xd1); 3691 z_illtrap(0xd1); 3692 bind(ok); 3693 #endif 3694 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 3695 3696 BLOCK_COMMENT("} cKlass decoder (const size)"); 3697 } 3698 3699 // This variant of decode_klass_not_null() is for cases where 3700 // 1) the size of the generated instructions may vary 3701 // 2) the result is (potentially) stored in a register different from the source. 3702 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3703 address base = CompressedKlassPointers::base(); 3704 int shift = CompressedKlassPointers::shift(); 3705 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3706 3707 BLOCK_COMMENT("cKlass decoder {"); 3708 3709 if (src == noreg) src = dst; 3710 3711 if (shift != 0) { // Shift or at least move required? 3712 z_sllg(dst, src, shift); 3713 } else { 3714 lgr_if_needed(dst, src); 3715 } 3716 3717 if (base != nullptr) { 3718 unsigned int base_h = ((unsigned long)base)>>32; 3719 unsigned int base_l = (unsigned int)((unsigned long)base); 3720 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3721 z_aih(dst, base_h); // Base has not set bits in lower half. 3722 } else if ((base_h == 0) && (base_l != 0)) { 3723 z_algfi(dst, base_l); // Base has no set bits in upper half. 3724 } else { 3725 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 3726 z_algr(dst, Z_R0); 3727 } 3728 } 3729 3730 #ifdef ASSERT 3731 Label ok; 3732 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 3733 z_brc(Assembler::bcondAllZero, ok); 3734 // The plain disassembler does not recognize illtrap. It instead displays 3735 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3736 // the proper beginning of the next instruction. 3737 z_illtrap(0xd2); 3738 z_illtrap(0xd2); 3739 bind(ok); 3740 #endif 3741 BLOCK_COMMENT("} cKlass decoder"); 3742 } 3743 3744 void MacroAssembler::load_klass(Register klass, Address mem) { 3745 if (UseCompressedClassPointers) { 3746 z_llgf(klass, mem); 3747 // Attention: no null check here! 3748 decode_klass_not_null(klass); 3749 } else { 3750 z_lg(klass, mem); 3751 } 3752 } 3753 3754 void MacroAssembler::load_klass(Register klass, Register src_oop) { 3755 if (UseCompressedClassPointers) { 3756 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3757 // Attention: no null check here! 3758 decode_klass_not_null(klass); 3759 } else { 3760 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3761 } 3762 } 3763 3764 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 3765 if (UseCompressedClassPointers) { 3766 assert_different_registers(dst_oop, klass, Z_R0); 3767 if (ck == noreg) ck = klass; 3768 encode_klass_not_null(ck, klass); 3769 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3770 } else { 3771 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3772 } 3773 } 3774 3775 void MacroAssembler::store_klass_gap(Register s, Register d) { 3776 if (UseCompressedClassPointers) { 3777 assert(s != d, "not enough registers"); 3778 // Support s = noreg. 3779 if (s != noreg) { 3780 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 3781 } else { 3782 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 3783 } 3784 } 3785 } 3786 3787 // Compare klass ptr in memory against klass ptr in register. 3788 // 3789 // Rop1 - klass in register, always uncompressed. 3790 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 3791 // Rbase - Base address of cKlass in memory. 3792 // maybenull - True if Rop1 possibly is a null. 3793 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull) { 3794 3795 BLOCK_COMMENT("compare klass ptr {"); 3796 3797 if (UseCompressedClassPointers) { 3798 const int shift = CompressedKlassPointers::shift(); 3799 address base = CompressedKlassPointers::base(); 3800 3801 if (CompressedKlassPointers::tiny_classpointer_mode()) { 3802 assert(shift >= 3, "cKlass encoder detected bad shift"); 3803 } else { 3804 assert((shift == 0) || (shift == 3), "cKlass encoder detected bad shift"); 3805 } 3806 assert_different_registers(Rop1, Z_R0); 3807 assert_different_registers(Rop1, Rbase, Z_R1); 3808 3809 // First encode register oop and then compare with cOop in memory. 3810 // This sequence saves an unnecessary cOop load and decode. 3811 if (base == nullptr) { 3812 if (shift == 0) { 3813 z_cl(Rop1, disp, Rbase); // Unscaled 3814 } else { 3815 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3816 z_cl(Z_R0, disp, Rbase); 3817 } 3818 } else { // HeapBased 3819 #ifdef ASSERT 3820 bool used_R0 = true; 3821 bool used_R1 = true; 3822 #endif 3823 Register current = Rop1; 3824 Label done; 3825 3826 if (maybenull) { // null pointer must be preserved! 3827 z_ltgr(Z_R0, current); 3828 z_bre(done); 3829 current = Z_R0; 3830 } 3831 3832 unsigned int base_h = ((unsigned long)base)>>32; 3833 unsigned int base_l = (unsigned int)((unsigned long)base); 3834 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3835 lgr_if_needed(Z_R0, current); 3836 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 3837 } else if ((base_h == 0) && (base_l != 0)) { 3838 lgr_if_needed(Z_R0, current); 3839 z_agfi(Z_R0, -(int)base_l); 3840 } else { 3841 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3842 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 3843 } 3844 3845 if (shift != 0) { 3846 z_srlg(Z_R0, Z_R0, shift); 3847 } 3848 bind(done); 3849 z_cl(Z_R0, disp, Rbase); 3850 #ifdef ASSERT 3851 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3852 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3853 #endif 3854 } 3855 } else { 3856 z_clg(Rop1, disp, Z_R0, Rbase); 3857 } 3858 BLOCK_COMMENT("} compare klass ptr"); 3859 } 3860 3861 //--------------------------- 3862 // Compressed oops 3863 //--------------------------- 3864 3865 void MacroAssembler::encode_heap_oop(Register oop) { 3866 oop_encoder(oop, oop, true /*maybe null*/); 3867 } 3868 3869 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 3870 oop_encoder(oop, oop, false /*not null*/); 3871 } 3872 3873 // Called with something derived from the oop base. e.g. oop_base>>3. 3874 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 3875 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 3876 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 3877 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 3878 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 3879 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 3880 + (oop_base_lh == 0 ? 0:1) 3881 + (oop_base_hl == 0 ? 0:1) 3882 + (oop_base_hh == 0 ? 0:1); 3883 3884 assert(oop_base != 0, "This is for HeapBased cOops only"); 3885 3886 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 3887 uint64_t pow2_offset = 0x10000 - oop_base_ll; 3888 if (pow2_offset < 0x8000) { // This might not be necessary. 3889 uint64_t oop_base2 = oop_base + pow2_offset; 3890 3891 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 3892 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 3893 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 3894 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 3895 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 3896 (oop_base_lh == 0 ? 0:1) + 3897 (oop_base_hl == 0 ? 0:1) + 3898 (oop_base_hh == 0 ? 0:1); 3899 if (n_notzero_parts == 1) { 3900 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 3901 return -pow2_offset; 3902 } 3903 } 3904 } 3905 return 0; 3906 } 3907 3908 // If base address is offset from a straight power of two by just a few pages, 3909 // return this offset to the caller for a possible later composite add. 3910 // TODO/FIX: will only work correctly for 4k pages. 3911 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 3912 int pow2_offset = get_oop_base_pow2_offset(oop_base); 3913 3914 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 3915 3916 return pow2_offset; 3917 } 3918 3919 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 3920 int offset = get_oop_base(Rbase, oop_base); 3921 z_lcgr(Rbase, Rbase); 3922 return -offset; 3923 } 3924 3925 // Compare compressed oop in memory against oop in register. 3926 // Rop1 - Oop in register. 3927 // disp - Offset of cOop in memory. 3928 // Rbase - Base address of cOop in memory. 3929 // maybenull - True if Rop1 possibly is a null. 3930 // maybenulltarget - Branch target for Rop1 == nullptr, if flow control shall NOT continue with compare instruction. 3931 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybenull) { 3932 Register Rbase = mem.baseOrR0(); 3933 Register Rindex = mem.indexOrR0(); 3934 int64_t disp = mem.disp(); 3935 3936 const int shift = CompressedOops::shift(); 3937 address base = CompressedOops::base(); 3938 3939 assert(UseCompressedOops, "must be on to call this method"); 3940 assert(Universe::heap() != nullptr, "java heap must be initialized to call this method"); 3941 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3942 assert_different_registers(Rop1, Z_R0); 3943 assert_different_registers(Rop1, Rbase, Z_R1); 3944 assert_different_registers(Rop1, Rindex, Z_R1); 3945 3946 BLOCK_COMMENT("compare heap oop {"); 3947 3948 // First encode register oop and then compare with cOop in memory. 3949 // This sequence saves an unnecessary cOop load and decode. 3950 if (base == nullptr) { 3951 if (shift == 0) { 3952 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 3953 } else { 3954 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3955 z_cl(Z_R0, disp, Rindex, Rbase); 3956 } 3957 } else { // HeapBased 3958 #ifdef ASSERT 3959 bool used_R0 = true; 3960 bool used_R1 = true; 3961 #endif 3962 Label done; 3963 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3964 3965 if (maybenull) { // null pointer must be preserved! 3966 z_ltgr(Z_R0, Rop1); 3967 z_bre(done); 3968 } 3969 3970 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 3971 z_srlg(Z_R0, Z_R0, shift); 3972 3973 bind(done); 3974 z_cl(Z_R0, disp, Rindex, Rbase); 3975 #ifdef ASSERT 3976 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3977 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3978 #endif 3979 } 3980 BLOCK_COMMENT("} compare heap oop"); 3981 } 3982 3983 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 3984 const Address& addr, Register val, 3985 Register tmp1, Register tmp2, Register tmp3) { 3986 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3987 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 3988 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3989 decorators = AccessInternal::decorator_fixup(decorators, type); 3990 bool as_raw = (decorators & AS_RAW) != 0; 3991 if (as_raw) { 3992 bs->BarrierSetAssembler::store_at(this, decorators, type, 3993 addr, val, 3994 tmp1, tmp2, tmp3); 3995 } else { 3996 bs->store_at(this, decorators, type, 3997 addr, val, 3998 tmp1, tmp2, tmp3); 3999 } 4000 } 4001 4002 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 4003 const Address& addr, Register dst, 4004 Register tmp1, Register tmp2, Label *is_null) { 4005 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 4006 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 4007 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4008 decorators = AccessInternal::decorator_fixup(decorators, type); 4009 bool as_raw = (decorators & AS_RAW) != 0; 4010 if (as_raw) { 4011 bs->BarrierSetAssembler::load_at(this, decorators, type, 4012 addr, dst, 4013 tmp1, tmp2, is_null); 4014 } else { 4015 bs->load_at(this, decorators, type, 4016 addr, dst, 4017 tmp1, tmp2, is_null); 4018 } 4019 } 4020 4021 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 4022 Register tmp1, Register tmp2, 4023 DecoratorSet decorators, Label *is_null) { 4024 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 4025 } 4026 4027 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 4028 Register tmp1, Register tmp2, Register tmp3, 4029 DecoratorSet decorators) { 4030 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 4031 } 4032 4033 //------------------------------------------------- 4034 // Encode compressed oop. Generally usable encoder. 4035 //------------------------------------------------- 4036 // Rsrc - contains regular oop on entry. It remains unchanged. 4037 // Rdst - contains compressed oop on exit. 4038 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 4039 // 4040 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 4041 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 4042 // 4043 // only32bitValid is set, if later code only uses the lower 32 bits. In this 4044 // case we must not fix the upper 32 bits. 4045 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybenull, 4046 Register Rbase, int pow2_offset, bool only32bitValid) { 4047 4048 const address oop_base = CompressedOops::base(); 4049 const int oop_shift = CompressedOops::shift(); 4050 const bool disjoint = CompressedOops::base_disjoint(); 4051 4052 assert(UseCompressedOops, "must be on to call this method"); 4053 assert(Universe::heap() != nullptr, "java heap must be initialized to call this encoder"); 4054 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 4055 4056 if (disjoint || (oop_base == nullptr)) { 4057 BLOCK_COMMENT("cOop encoder zeroBase {"); 4058 if (oop_shift == 0) { 4059 if (oop_base != nullptr && !only32bitValid) { 4060 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 4061 } else { 4062 lgr_if_needed(Rdst, Rsrc); 4063 } 4064 } else { 4065 z_srlg(Rdst, Rsrc, oop_shift); 4066 if (oop_base != nullptr && !only32bitValid) { 4067 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4068 } 4069 } 4070 BLOCK_COMMENT("} cOop encoder zeroBase"); 4071 return; 4072 } 4073 4074 bool used_R0 = false; 4075 bool used_R1 = false; 4076 4077 BLOCK_COMMENT("cOop encoder general {"); 4078 assert_different_registers(Rdst, Z_R1); 4079 assert_different_registers(Rsrc, Rbase); 4080 if (maybenull) { 4081 Label done; 4082 // We reorder shifting and subtracting, so that we can compare 4083 // and shift in parallel: 4084 // 4085 // cycle 0: potential LoadN, base = <const> 4086 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 4087 // cycle 2: if (cr) br, dst = dst + base + offset 4088 4089 // Get oop_base components. 4090 if (pow2_offset == -1) { 4091 if (Rdst == Rbase) { 4092 if (Rdst == Z_R1 || Rsrc == Z_R1) { 4093 Rbase = Z_R0; 4094 used_R0 = true; 4095 } else { 4096 Rdst = Z_R1; 4097 used_R1 = true; 4098 } 4099 } 4100 if (Rbase == Z_R1) { 4101 used_R1 = true; 4102 } 4103 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 4104 } 4105 assert_different_registers(Rdst, Rbase); 4106 4107 // Check for null oop (must be left alone) and shift. 4108 if (oop_shift != 0) { // Shift out alignment bits 4109 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 4110 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4111 } else { 4112 z_srlg(Rdst, Rsrc, oop_shift); 4113 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 4114 // This probably is faster, as it does not write a register. No! 4115 // z_cghi(Rsrc, 0); 4116 } 4117 } else { 4118 z_ltgr(Rdst, Rsrc); // Move null to result register. 4119 } 4120 z_bre(done); 4121 4122 // Subtract oop_base components. 4123 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 4124 z_algr(Rdst, Rbase); 4125 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 4126 } else { 4127 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4128 } 4129 if (!only32bitValid) { 4130 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4131 } 4132 bind(done); 4133 4134 } else { // not null 4135 // Get oop_base components. 4136 if (pow2_offset == -1) { 4137 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4138 } 4139 4140 // Subtract oop_base components and shift. 4141 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4142 // Don't use lay instruction. 4143 if (Rdst == Rsrc) { 4144 z_algr(Rdst, Rbase); 4145 } else { 4146 lgr_if_needed(Rdst, Rbase); 4147 z_algr(Rdst, Rsrc); 4148 } 4149 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4150 } else { 4151 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4152 } 4153 if (oop_shift != 0) { // Shift out alignment bits. 4154 z_srlg(Rdst, Rdst, oop_shift); 4155 } 4156 if (!only32bitValid) { 4157 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4158 } 4159 } 4160 #ifdef ASSERT 4161 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4162 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4163 #endif 4164 BLOCK_COMMENT("} cOop encoder general"); 4165 } 4166 4167 //------------------------------------------------- 4168 // decode compressed oop. Generally usable decoder. 4169 //------------------------------------------------- 4170 // Rsrc - contains compressed oop on entry. 4171 // Rdst - contains regular oop on exit. 4172 // Rdst and Rsrc may indicate same register. 4173 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4174 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4175 // Rbase - register to use for the base 4176 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4177 // For performance, it is good to 4178 // - avoid Z_R0 for any of the argument registers. 4179 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4180 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4181 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybenull, Register Rbase, int pow2_offset) { 4182 4183 const address oop_base = CompressedOops::base(); 4184 const int oop_shift = CompressedOops::shift(); 4185 const bool disjoint = CompressedOops::base_disjoint(); 4186 4187 assert(UseCompressedOops, "must be on to call this method"); 4188 assert(Universe::heap() != nullptr, "java heap must be initialized to call this decoder"); 4189 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4190 "cOop encoder detected bad shift"); 4191 4192 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4193 4194 if (oop_base != nullptr) { 4195 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4196 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4197 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4198 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4199 BLOCK_COMMENT("cOop decoder disjointBase {"); 4200 // We do not need to load the base. Instead, we can install the upper bits 4201 // with an OR instead of an ADD. 4202 Label done; 4203 4204 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4205 if (maybenull) { // null pointer must be preserved! 4206 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4207 z_bre(done); 4208 } else { 4209 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4210 } 4211 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4212 z_oihf(Rdst, oop_base_hf); 4213 } else if (oop_base_hl != 0) { 4214 z_oihl(Rdst, oop_base_hl); 4215 } else { 4216 assert(oop_base_hh != 0, "not heapbased mode"); 4217 z_oihh(Rdst, oop_base_hh); 4218 } 4219 bind(done); 4220 BLOCK_COMMENT("} cOop decoder disjointBase"); 4221 } else { 4222 BLOCK_COMMENT("cOop decoder general {"); 4223 // There are three decode steps: 4224 // scale oop offset (shift left) 4225 // get base (in reg) and pow2_offset (constant) 4226 // add base, pow2_offset, and oop offset 4227 // The following register overlap situations may exist: 4228 // Rdst == Rsrc, Rbase any other 4229 // not a problem. Scaling in-place leaves Rbase undisturbed. 4230 // Loading Rbase does not impact the scaled offset. 4231 // Rdst == Rbase, Rsrc any other 4232 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4233 // would destroy the scaled offset. 4234 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4235 // use Rbase_tmp if base has to be loaded. 4236 // Rsrc == Rbase, Rdst any other 4237 // Only possible without preloaded Rbase. 4238 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4239 // Rsrc == Rbase, Rdst == Rbase 4240 // Only possible without preloaded Rbase. 4241 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4242 // Remedy: use Rbase_tmp. 4243 // 4244 Label done; 4245 Register Rdst_tmp = Rdst; 4246 Register Rbase_tmp = Rbase; 4247 bool used_R0 = false; 4248 bool used_R1 = false; 4249 bool base_preloaded = pow2_offset >= 0; 4250 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4251 assert(oop_shift != 0, "room for optimization"); 4252 4253 // Check if we need to use scratch registers. 4254 if (Rdst == Rbase) { 4255 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4256 if (Rdst != Rsrc) { 4257 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4258 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4259 } else { 4260 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4261 } 4262 } 4263 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4264 4265 // Scale oop and check for null. 4266 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4267 if (maybenull) { // null pointer must be preserved! 4268 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4269 z_bre(done); 4270 } else { 4271 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4272 } 4273 4274 // Get oop_base components. 4275 if (!base_preloaded) { 4276 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4277 } 4278 4279 // Add up all components. 4280 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4281 z_algr(Rdst_tmp, Rbase_tmp); 4282 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4283 } else { 4284 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4285 } 4286 4287 bind(done); 4288 lgr_if_needed(Rdst, Rdst_tmp); 4289 #ifdef ASSERT 4290 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4291 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4292 #endif 4293 BLOCK_COMMENT("} cOop decoder general"); 4294 } 4295 } else { 4296 BLOCK_COMMENT("cOop decoder zeroBase {"); 4297 if (oop_shift == 0) { 4298 lgr_if_needed(Rdst, Rsrc); 4299 } else { 4300 z_sllg(Rdst, Rsrc, oop_shift); 4301 } 4302 BLOCK_COMMENT("} cOop decoder zeroBase"); 4303 } 4304 } 4305 4306 // ((OopHandle)result).resolve(); 4307 void MacroAssembler::resolve_oop_handle(Register result) { 4308 // OopHandle::resolve is an indirection. 4309 z_lg(result, 0, result); 4310 } 4311 4312 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4313 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4314 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset())); 4315 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4316 resolve_oop_handle(mirror); 4317 } 4318 4319 void MacroAssembler::load_method_holder(Register holder, Register method) { 4320 mem2reg_opt(holder, Address(method, Method::const_offset())); 4321 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4322 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset())); 4323 } 4324 4325 //--------------------------------------------------------------- 4326 //--- Operations on arrays. 4327 //--------------------------------------------------------------- 4328 4329 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4330 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4331 // work registers anyway. 4332 // Actually, only r0, r1, and r5 are killed. 4333 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4334 4335 int block_start = offset(); 4336 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4337 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4338 4339 Label doXC, doMVCLE, done; 4340 4341 BLOCK_COMMENT("Clear_Array {"); 4342 4343 // Check for zero len and convert to long. 4344 z_ltgfr(odd_tmp_reg, cnt_arg); 4345 z_bre(done); // Nothing to do if len == 0. 4346 4347 // Prefetch data to be cleared. 4348 if (VM_Version::has_Prefetch()) { 4349 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4350 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4351 } 4352 4353 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4354 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4355 z_brnh(doXC); // If so, use executed XC to clear. 4356 4357 // MVCLE: initialize long arrays (general case). 4358 bind(doMVCLE); 4359 z_lgr(dst_addr, base_pointer_arg); 4360 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4361 // The even register of the register pair is not killed. 4362 clear_reg(odd_tmp_reg, true, false); 4363 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4364 z_bru(done); 4365 4366 // XC: initialize short arrays. 4367 Label XC_template; // Instr template, never exec directly! 4368 bind(XC_template); 4369 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4370 4371 bind(doXC); 4372 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4373 if (VM_Version::has_ExecuteExtensions()) { 4374 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4375 } else { 4376 z_larl(odd_tmp_reg, XC_template); 4377 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4378 } 4379 // z_bru(done); // fallthru 4380 4381 bind(done); 4382 4383 BLOCK_COMMENT("} Clear_Array"); 4384 4385 int block_end = offset(); 4386 return block_end - block_start; 4387 } 4388 4389 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4390 // Emitter does not KILL any arguments nor work registers. 4391 // Emitter generates up to 16 XC instructions, depending on the array length. 4392 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4393 int block_start = offset(); 4394 int off; 4395 int lineSize_Bytes = AllocatePrefetchStepSize; 4396 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4397 bool doPrefetch = VM_Version::has_Prefetch(); 4398 int XC_maxlen = 256; 4399 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4400 4401 BLOCK_COMMENT("Clear_Array_Const {"); 4402 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4403 4404 // Do less prefetching for very short arrays. 4405 if (numXCInstr > 0) { 4406 // Prefetch only some cache lines, then begin clearing. 4407 if (doPrefetch) { 4408 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4409 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4410 } else { 4411 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4412 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4413 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4414 } 4415 } 4416 } 4417 4418 for (off=0; off<(numXCInstr-1); off++) { 4419 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4420 4421 // Prefetch some cache lines in advance. 4422 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4423 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4424 } 4425 } 4426 if (off*XC_maxlen < cnt*BytesPerWord) { 4427 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4428 } 4429 } 4430 BLOCK_COMMENT("} Clear_Array_Const"); 4431 4432 int block_end = offset(); 4433 return block_end - block_start; 4434 } 4435 4436 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4437 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4438 // work registers anyway. 4439 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4440 // 4441 // For very large arrays, exploit MVCLE H/W support. 4442 // MVCLE instruction automatically exploits H/W-optimized page mover. 4443 // - Bytes up to next page boundary are cleared with a series of XC to self. 4444 // - All full pages are cleared with the page mover H/W assist. 4445 // - Remaining bytes are again cleared by a series of XC to self. 4446 // 4447 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4448 4449 int block_start = offset(); 4450 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4451 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4452 4453 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4454 4455 // Get len to clear. 4456 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4457 4458 // Prepare other args to MVCLE. 4459 z_lgr(dst_addr, base_pointer_arg); 4460 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4461 // The even register of the register pair is not killed. 4462 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4463 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4464 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4465 4466 int block_end = offset(); 4467 return block_end - block_start; 4468 } 4469 4470 // Allocator. 4471 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4472 Register cnt_reg, 4473 Register tmp1_reg, Register tmp2_reg) { 4474 // Tmp1 is oddReg. 4475 // Tmp2 is evenReg. 4476 4477 int block_start = offset(); 4478 Label doMVC, doMVCLE, done, MVC_template; 4479 4480 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4481 4482 // Check for zero len and convert to long. 4483 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4484 z_bre(done); // Nothing to do if len == 0. 4485 4486 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4487 4488 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4489 z_brnh(doMVC); // If so, use executed MVC to clear. 4490 4491 bind(doMVCLE); // A lot of data (more than 256 bytes). 4492 // Prep dest reg pair. 4493 z_lgr(Z_R0, dst_reg); // dst addr 4494 // Dst len already in Z_R1. 4495 // Prep src reg pair. 4496 z_lgr(tmp2_reg, src_reg); // src addr 4497 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4498 4499 // Do the copy. 4500 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4501 z_bru(done); // All done. 4502 4503 bind(MVC_template); // Just some data (not more than 256 bytes). 4504 z_mvc(0, 0, dst_reg, 0, src_reg); 4505 4506 bind(doMVC); 4507 4508 if (VM_Version::has_ExecuteExtensions()) { 4509 add2reg(Z_R1, -1); 4510 } else { 4511 add2reg(tmp1_reg, -1, Z_R1); 4512 z_larl(Z_R1, MVC_template); 4513 } 4514 4515 if (VM_Version::has_Prefetch()) { 4516 z_pfd(1, 0,Z_R0,src_reg); 4517 z_pfd(2, 0,Z_R0,dst_reg); 4518 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4519 // z_pfd(2,256,Z_R0,dst_reg); 4520 } 4521 4522 if (VM_Version::has_ExecuteExtensions()) { 4523 z_exrl(Z_R1, MVC_template); 4524 } else { 4525 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4526 } 4527 4528 bind(done); 4529 4530 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4531 4532 int block_end = offset(); 4533 return block_end - block_start; 4534 } 4535 4536 //------------------------------------------------- 4537 // Constants (scalar and oop) in constant pool 4538 //------------------------------------------------- 4539 4540 // Add a non-relocated constant to the CP. 4541 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4542 long value = val.value(); 4543 address tocPos = long_constant(value); 4544 4545 if (tocPos != nullptr) { 4546 int tocOffset = (int)(tocPos - code()->consts()->start()); 4547 return tocOffset; 4548 } 4549 // Address_constant returned null, so no constant entry has been created. 4550 // In that case, we return a "fatal" offset, just in case that subsequently 4551 // generated access code is executed. 4552 return -1; 4553 } 4554 4555 // Returns the TOC offset where the address is stored. 4556 // Add a relocated constant to the CP. 4557 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4558 // Use RelocationHolder::none for the constant pool entry. 4559 // Otherwise we will end up with a failing NativeCall::verify(x), 4560 // where x is the address of the constant pool entry. 4561 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4562 4563 if (tocPos != nullptr) { 4564 int tocOffset = (int)(tocPos - code()->consts()->start()); 4565 RelocationHolder rsp = oop.rspec(); 4566 Relocation *rel = rsp.reloc(); 4567 4568 // Store toc_offset in relocation, used by call_far_patchable. 4569 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4570 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4571 } 4572 // Relocate at the load's pc. 4573 relocate(rsp); 4574 4575 return tocOffset; 4576 } 4577 // Address_constant returned null, so no constant entry has been created 4578 // in that case, we return a "fatal" offset, just in case that subsequently 4579 // generated access code is executed. 4580 return -1; 4581 } 4582 4583 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4584 int tocOffset = store_const_in_toc(a); 4585 if (tocOffset == -1) return false; 4586 address tocPos = tocOffset + code()->consts()->start(); 4587 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4588 relocate(a.rspec()); 4589 load_long_pcrelative(dst, tocPos); 4590 return true; 4591 } 4592 4593 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4594 int tocOffset = store_oop_in_toc(a); 4595 if (tocOffset == -1) return false; 4596 address tocPos = tocOffset + code()->consts()->start(); 4597 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4598 4599 load_addr_pcrelative(dst, tocPos); 4600 return true; 4601 } 4602 4603 // If the instruction sequence at the given pc is a load_const_from_toc 4604 // sequence, return the value currently stored at the referenced position 4605 // in the TOC. 4606 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4607 4608 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4609 4610 long offset = get_load_const_from_toc_offset(pc); 4611 address dataLoc = nullptr; 4612 if (is_load_const_from_toc_pcrelative(pc)) { 4613 dataLoc = pc + offset; 4614 } else { 4615 CodeBlob* cb = CodeCache::find_blob(pc); 4616 assert(cb && cb->is_nmethod(), "sanity"); 4617 nmethod* nm = (nmethod*)cb; 4618 dataLoc = nm->ctable_begin() + offset; 4619 } 4620 return *(intptr_t *)dataLoc; 4621 } 4622 4623 // If the instruction sequence at the given pc is a load_const_from_toc 4624 // sequence, copy the passed-in new_data value into the referenced 4625 // position in the TOC. 4626 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 4627 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4628 4629 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 4630 address dataLoc = nullptr; 4631 if (is_load_const_from_toc_pcrelative(pc)) { 4632 dataLoc = pc+offset; 4633 } else { 4634 nmethod* nm = CodeCache::find_nmethod(pc); 4635 assert((cb == nullptr) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 4636 dataLoc = nm->ctable_begin() + offset; 4637 } 4638 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 4639 *(unsigned long *)dataLoc = new_data; 4640 } 4641 } 4642 4643 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 4644 // site. Verify by calling is_load_const_from_toc() before!! 4645 // Offset is +/- 2**32 -> use long. 4646 long MacroAssembler::get_load_const_from_toc_offset(address a) { 4647 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 4648 // expected code sequence: 4649 // z_lgrl(t, simm32); len = 6 4650 unsigned long inst; 4651 unsigned int len = get_instruction(a, &inst); 4652 return get_pcrel_offset(inst); 4653 } 4654 4655 //********************************************************************************** 4656 // inspection of generated instruction sequences for a particular pattern 4657 //********************************************************************************** 4658 4659 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 4660 #ifdef ASSERT 4661 unsigned long inst; 4662 unsigned int len = get_instruction(a+2, &inst); 4663 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 4664 const int range = 128; 4665 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 4666 VM_Version::z_SIGSEGV(); 4667 } 4668 #endif 4669 // expected code sequence: 4670 // z_lgrl(t, relAddr32); len = 6 4671 //TODO: verify accessed data is in CP, if possible. 4672 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 4673 } 4674 4675 bool MacroAssembler::is_load_const_from_toc_call(address a) { 4676 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 4677 } 4678 4679 bool MacroAssembler::is_load_const_call(address a) { 4680 return is_load_const(a) && is_call_byregister(a + load_const_size()); 4681 } 4682 4683 //------------------------------------------------- 4684 // Emitters for some really CICS instructions 4685 //------------------------------------------------- 4686 4687 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 4688 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 4689 assert(src->encoding()%2==0, "must be an even/odd register pair"); 4690 assert(pad<256, "must be a padding BYTE"); 4691 4692 Label retry; 4693 bind(retry); 4694 Assembler::z_mvcle(dst, src, pad); 4695 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4696 } 4697 4698 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 4699 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4700 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4701 assert(pad<256, "must be a padding BYTE"); 4702 4703 Label retry; 4704 bind(retry); 4705 Assembler::z_clcle(left, right, pad, Z_R0); 4706 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4707 } 4708 4709 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 4710 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4711 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4712 assert(pad<=0xfff, "must be a padding HALFWORD"); 4713 assert(VM_Version::has_ETF2(), "instruction must be available"); 4714 4715 Label retry; 4716 bind(retry); 4717 Assembler::z_clclu(left, right, pad, Z_R0); 4718 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4719 } 4720 4721 void MacroAssembler::search_string(Register end, Register start) { 4722 assert(end->encoding() != 0, "end address must not be in R0"); 4723 assert(start->encoding() != 0, "start address must not be in R0"); 4724 4725 Label retry; 4726 bind(retry); 4727 Assembler::z_srst(end, start); 4728 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4729 } 4730 4731 void MacroAssembler::search_string_uni(Register end, Register start) { 4732 assert(end->encoding() != 0, "end address must not be in R0"); 4733 assert(start->encoding() != 0, "start address must not be in R0"); 4734 assert(VM_Version::has_ETF3(), "instruction must be available"); 4735 4736 Label retry; 4737 bind(retry); 4738 Assembler::z_srstu(end, start); 4739 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4740 } 4741 4742 void MacroAssembler::kmac(Register srcBuff) { 4743 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4744 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4745 4746 Label retry; 4747 bind(retry); 4748 Assembler::z_kmac(Z_R0, srcBuff); 4749 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4750 } 4751 4752 void MacroAssembler::kimd(Register srcBuff) { 4753 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4754 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4755 4756 Label retry; 4757 bind(retry); 4758 Assembler::z_kimd(Z_R0, srcBuff); 4759 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4760 } 4761 4762 void MacroAssembler::klmd(Register srcBuff) { 4763 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4764 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4765 4766 Label retry; 4767 bind(retry); 4768 Assembler::z_klmd(Z_R0, srcBuff); 4769 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4770 } 4771 4772 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 4773 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4774 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4775 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4776 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4777 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4778 4779 Label retry; 4780 bind(retry); 4781 Assembler::z_km(dstBuff, srcBuff); 4782 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4783 } 4784 4785 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 4786 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4787 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4788 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4789 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4790 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4791 4792 Label retry; 4793 bind(retry); 4794 Assembler::z_kmc(dstBuff, srcBuff); 4795 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4796 } 4797 4798 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) { 4799 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4800 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4801 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4802 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0"); 4803 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0"); 4804 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register"); 4805 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4806 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4807 4808 Label retry; 4809 bind(retry); 4810 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff); 4811 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4812 } 4813 4814 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 4815 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4816 4817 Label retry; 4818 bind(retry); 4819 Assembler::z_cksm(crcBuff, srcBuff); 4820 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4821 } 4822 4823 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 4824 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4825 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4826 4827 Label retry; 4828 bind(retry); 4829 Assembler::z_troo(r1, r2, m3); 4830 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4831 } 4832 4833 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 4834 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4835 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4836 4837 Label retry; 4838 bind(retry); 4839 Assembler::z_trot(r1, r2, m3); 4840 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4841 } 4842 4843 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 4844 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4845 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4846 4847 Label retry; 4848 bind(retry); 4849 Assembler::z_trto(r1, r2, m3); 4850 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4851 } 4852 4853 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 4854 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4855 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4856 4857 Label retry; 4858 bind(retry); 4859 Assembler::z_trtt(r1, r2, m3); 4860 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4861 } 4862 4863 //--------------------------------------- 4864 // Helpers for Intrinsic Emitters 4865 //--------------------------------------- 4866 4867 /** 4868 * uint32_t crc; 4869 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4870 */ 4871 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 4872 assert_different_registers(crc, table, tmp); 4873 assert_different_registers(val, table); 4874 if (crc == val) { // Must rotate first to use the unmodified value. 4875 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4876 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4877 } else { 4878 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4879 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4880 } 4881 z_x(crc, Address(table, tmp, 0)); 4882 } 4883 4884 /** 4885 * uint32_t crc; 4886 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4887 */ 4888 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 4889 fold_byte_crc32(crc, crc, table, tmp); 4890 } 4891 4892 /** 4893 * Emits code to update CRC-32 with a byte value according to constants in table. 4894 * 4895 * @param [in,out]crc Register containing the crc. 4896 * @param [in]val Register containing the byte to fold into the CRC. 4897 * @param [in]table Register containing the table of crc constants. 4898 * 4899 * uint32_t crc; 4900 * val = crc_table[(val ^ crc) & 0xFF]; 4901 * crc = val ^ (crc >> 8); 4902 */ 4903 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 4904 z_xr(val, crc); 4905 fold_byte_crc32(crc, val, table, val); 4906 } 4907 4908 4909 /** 4910 * @param crc register containing existing CRC (32-bit) 4911 * @param buf register pointing to input byte buffer (byte*) 4912 * @param len register containing number of bytes 4913 * @param table register pointing to CRC table 4914 */ 4915 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 4916 assert_different_registers(crc, buf, len, table, data); 4917 4918 Label L_mainLoop, L_done; 4919 const int mainLoop_stepping = 1; 4920 4921 // Process all bytes in a single-byte loop. 4922 z_ltr(len, len); 4923 z_brnh(L_done); 4924 4925 bind(L_mainLoop); 4926 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4927 add2reg(buf, mainLoop_stepping); // Advance buffer position. 4928 update_byte_crc32(crc, data, table); 4929 z_brct(len, L_mainLoop); // Iterate. 4930 4931 bind(L_done); 4932 } 4933 4934 /** 4935 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 4936 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 4937 * 4938 */ 4939 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 4940 Register t0, Register t1, Register t2, Register t3) { 4941 // This is what we implement (the DOBIG4 part): 4942 // 4943 // #define DOBIG4 c ^= *++buf4; \ 4944 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 4945 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 4946 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 4947 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 4948 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 4949 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 4950 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 4951 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 4952 4953 // XOR crc with next four bytes of buffer. 4954 lgr_if_needed(t0, crc); 4955 z_x(t0, Address(buf, bufDisp)); 4956 if (bufInc != 0) { 4957 add2reg(buf, bufInc); 4958 } 4959 4960 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 4961 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 4962 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 4963 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 4964 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 4965 4966 // XOR indexed table values to calculate updated crc. 4967 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 4968 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 4969 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 4970 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 4971 z_xr(t0, t2); // Now t0 contains the updated CRC value. 4972 lgr_if_needed(crc, t0); 4973 } 4974 4975 /** 4976 * @param crc register containing existing CRC (32-bit) 4977 * @param buf register pointing to input byte buffer (byte*) 4978 * @param len register containing number of bytes 4979 * @param table register pointing to CRC table 4980 * 4981 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 4982 */ 4983 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 4984 Register t0, Register t1, Register t2, Register t3, 4985 bool invertCRC) { 4986 assert_different_registers(crc, buf, len, table); 4987 4988 Label L_mainLoop, L_tail; 4989 Register data = t0; 4990 Register ctr = Z_R0; 4991 const int mainLoop_stepping = 4; 4992 const int log_stepping = exact_log2(mainLoop_stepping); 4993 4994 // Don't test for len <= 0 here. This pathological case should not occur anyway. 4995 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 4996 // The situation itself is detected and handled correctly by the conditional branches 4997 // following aghi(len, -stepping) and aghi(len, +stepping). 4998 4999 if (invertCRC) { 5000 not_(crc, noreg, false); // 1s complement of crc 5001 } 5002 5003 // Check for short (<4 bytes) buffer. 5004 z_srag(ctr, len, log_stepping); 5005 z_brnh(L_tail); 5006 5007 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 5008 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 5009 5010 BIND(L_mainLoop); 5011 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 5012 z_brct(ctr, L_mainLoop); // Iterate. 5013 5014 z_lrvr(crc, crc); // Revert byte order back to original. 5015 5016 // Process last few (<8) bytes of buffer. 5017 BIND(L_tail); 5018 update_byteLoop_crc32(crc, buf, len, table, data); 5019 5020 if (invertCRC) { 5021 not_(crc, noreg, false); // 1s complement of crc 5022 } 5023 } 5024 5025 /** 5026 * @param crc register containing existing CRC (32-bit) 5027 * @param buf register pointing to input byte buffer (byte*) 5028 * @param len register containing number of bytes 5029 * @param table register pointing to CRC table 5030 */ 5031 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 5032 Register t0, Register t1, Register t2, Register t3, 5033 bool invertCRC) { 5034 assert_different_registers(crc, buf, len, table); 5035 Register data = t0; 5036 5037 if (invertCRC) { 5038 not_(crc, noreg, false); // 1s complement of crc 5039 } 5040 5041 update_byteLoop_crc32(crc, buf, len, table, data); 5042 5043 if (invertCRC) { 5044 not_(crc, noreg, false); // 1s complement of crc 5045 } 5046 } 5047 5048 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 5049 bool invertCRC) { 5050 assert_different_registers(crc, buf, len, table, tmp); 5051 5052 if (invertCRC) { 5053 not_(crc, noreg, false); // 1s complement of crc 5054 } 5055 5056 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 5057 update_byte_crc32(crc, tmp, table); 5058 5059 if (invertCRC) { 5060 not_(crc, noreg, false); // 1s complement of crc 5061 } 5062 } 5063 5064 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 5065 bool invertCRC) { 5066 assert_different_registers(crc, val, table); 5067 5068 if (invertCRC) { 5069 not_(crc, noreg, false); // 1s complement of crc 5070 } 5071 5072 update_byte_crc32(crc, val, table); 5073 5074 if (invertCRC) { 5075 not_(crc, noreg, false); // 1s complement of crc 5076 } 5077 } 5078 5079 // 5080 // Code for BigInteger::multiplyToLen() intrinsic. 5081 // 5082 5083 // dest_lo += src1 + src2 5084 // dest_hi += carry1 + carry2 5085 // Z_R7 is destroyed ! 5086 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 5087 Register src1, Register src2) { 5088 clear_reg(Z_R7); 5089 z_algr(dest_lo, src1); 5090 z_alcgr(dest_hi, Z_R7); 5091 z_algr(dest_lo, src2); 5092 z_alcgr(dest_hi, Z_R7); 5093 } 5094 5095 // Multiply 64 bit by 64 bit first loop. 5096 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 5097 Register x_xstart, 5098 Register y, Register y_idx, 5099 Register z, 5100 Register carry, 5101 Register product, 5102 Register idx, Register kdx) { 5103 // jlong carry, x[], y[], z[]; 5104 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 5105 // huge_128 product = y[idx] * x[xstart] + carry; 5106 // z[kdx] = (jlong)product; 5107 // carry = (jlong)(product >>> 64); 5108 // } 5109 // z[xstart] = carry; 5110 5111 Label L_first_loop, L_first_loop_exit; 5112 Label L_one_x, L_one_y, L_multiply; 5113 5114 z_aghi(xstart, -1); 5115 z_brl(L_one_x); // Special case: length of x is 1. 5116 5117 // Load next two integers of x. 5118 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5119 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5120 5121 5122 bind(L_first_loop); 5123 5124 z_aghi(idx, -1); 5125 z_brl(L_first_loop_exit); 5126 z_aghi(idx, -1); 5127 z_brl(L_one_y); 5128 5129 // Load next two integers of y. 5130 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 5131 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 5132 5133 5134 bind(L_multiply); 5135 5136 Register multiplicand = product->successor(); 5137 Register product_low = multiplicand; 5138 5139 lgr_if_needed(multiplicand, x_xstart); 5140 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 5141 clear_reg(Z_R7); 5142 z_algr(product_low, carry); // Add carry to result. 5143 z_alcgr(product, Z_R7); // Add carry of the last addition. 5144 add2reg(kdx, -2); 5145 5146 // Store result. 5147 z_sllg(Z_R7, kdx, LogBytesPerInt); 5148 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5149 lgr_if_needed(carry, product); 5150 z_bru(L_first_loop); 5151 5152 5153 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5154 5155 clear_reg(y_idx); 5156 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5157 z_bru(L_multiply); 5158 5159 5160 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5161 5162 clear_reg(x_xstart); 5163 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5164 z_bru(L_first_loop); 5165 5166 bind(L_first_loop_exit); 5167 } 5168 5169 // Multiply 64 bit by 64 bit and add 128 bit. 5170 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5171 Register z, 5172 Register yz_idx, Register idx, 5173 Register carry, Register product, 5174 int offset) { 5175 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5176 // z[kdx] = (jlong)product; 5177 5178 Register multiplicand = product->successor(); 5179 Register product_low = multiplicand; 5180 5181 z_sllg(Z_R7, idx, LogBytesPerInt); 5182 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5183 5184 lgr_if_needed(multiplicand, x_xstart); 5185 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5186 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5187 5188 add2_with_carry(product, product_low, carry, yz_idx); 5189 5190 z_sllg(Z_R7, idx, LogBytesPerInt); 5191 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5192 5193 } 5194 5195 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5196 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5197 Register y, Register z, 5198 Register yz_idx, Register idx, 5199 Register jdx, 5200 Register carry, Register product, 5201 Register carry2) { 5202 // jlong carry, x[], y[], z[]; 5203 // int kdx = ystart+1; 5204 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5205 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5206 // z[kdx+idx+1] = (jlong)product; 5207 // jlong carry2 = (jlong)(product >>> 64); 5208 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5209 // z[kdx+idx] = (jlong)product; 5210 // carry = (jlong)(product >>> 64); 5211 // } 5212 // idx += 2; 5213 // if (idx > 0) { 5214 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5215 // z[kdx+idx] = (jlong)product; 5216 // carry = (jlong)(product >>> 64); 5217 // } 5218 5219 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5220 5221 // scale the index 5222 lgr_if_needed(jdx, idx); 5223 and_imm(jdx, 0xfffffffffffffffcL); 5224 rshift(jdx, 2); 5225 5226 5227 bind(L_third_loop); 5228 5229 z_aghi(jdx, -1); 5230 z_brl(L_third_loop_exit); 5231 add2reg(idx, -4); 5232 5233 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5234 lgr_if_needed(carry2, product); 5235 5236 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5237 lgr_if_needed(carry, product); 5238 z_bru(L_third_loop); 5239 5240 5241 bind(L_third_loop_exit); // Handle any left-over operand parts. 5242 5243 and_imm(idx, 0x3); 5244 z_brz(L_post_third_loop_done); 5245 5246 Label L_check_1; 5247 5248 z_aghi(idx, -2); 5249 z_brl(L_check_1); 5250 5251 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5252 lgr_if_needed(carry, product); 5253 5254 5255 bind(L_check_1); 5256 5257 add2reg(idx, 0x2); 5258 and_imm(idx, 0x1); 5259 z_aghi(idx, -1); 5260 z_brl(L_post_third_loop_done); 5261 5262 Register multiplicand = product->successor(); 5263 Register product_low = multiplicand; 5264 5265 z_sllg(Z_R7, idx, LogBytesPerInt); 5266 clear_reg(yz_idx); 5267 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5268 lgr_if_needed(multiplicand, x_xstart); 5269 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5270 clear_reg(yz_idx); 5271 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5272 5273 add2_with_carry(product, product_low, yz_idx, carry); 5274 5275 z_sllg(Z_R7, idx, LogBytesPerInt); 5276 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5277 rshift(product_low, 32); 5278 5279 lshift(product, 32); 5280 z_ogr(product_low, product); 5281 lgr_if_needed(carry, product_low); 5282 5283 bind(L_post_third_loop_done); 5284 } 5285 5286 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5287 Register y, Register ylen, 5288 Register z, 5289 Register tmp1, Register tmp2, 5290 Register tmp3, Register tmp4, 5291 Register tmp5) { 5292 ShortBranchVerifier sbv(this); 5293 5294 assert_different_registers(x, xlen, y, ylen, z, 5295 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5296 assert_different_registers(x, xlen, y, ylen, z, 5297 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5298 5299 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5300 5301 const Register idx = tmp1; 5302 const Register kdx = tmp2; 5303 const Register xstart = tmp3; 5304 5305 const Register y_idx = tmp4; 5306 const Register carry = tmp5; 5307 const Register product = Z_R0_scratch; 5308 const Register x_xstart = Z_R8; 5309 5310 // First Loop. 5311 // 5312 // final static long LONG_MASK = 0xffffffffL; 5313 // int xstart = xlen - 1; 5314 // int ystart = ylen - 1; 5315 // long carry = 0; 5316 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5317 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5318 // z[kdx] = (int)product; 5319 // carry = product >>> 32; 5320 // } 5321 // z[xstart] = (int)carry; 5322 // 5323 5324 lgr_if_needed(idx, ylen); // idx = ylen 5325 z_agrk(kdx, xlen, ylen); // kdx = xlen + ylen 5326 clear_reg(carry); // carry = 0 5327 5328 Label L_done; 5329 5330 lgr_if_needed(xstart, xlen); 5331 z_aghi(xstart, -1); 5332 z_brl(L_done); 5333 5334 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5335 5336 NearLabel L_second_loop; 5337 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5338 5339 NearLabel L_carry; 5340 z_aghi(kdx, -1); 5341 z_brz(L_carry); 5342 5343 // Store lower 32 bits of carry. 5344 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5345 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5346 rshift(carry, 32); 5347 z_aghi(kdx, -1); 5348 5349 5350 bind(L_carry); 5351 5352 // Store upper 32 bits of carry. 5353 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5354 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5355 5356 // Second and third (nested) loops. 5357 // 5358 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5359 // carry = 0; 5360 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5361 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5362 // (z[k] & LONG_MASK) + carry; 5363 // z[k] = (int)product; 5364 // carry = product >>> 32; 5365 // } 5366 // z[i] = (int)carry; 5367 // } 5368 // 5369 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5370 5371 const Register jdx = tmp1; 5372 5373 bind(L_second_loop); 5374 5375 clear_reg(carry); // carry = 0; 5376 lgr_if_needed(jdx, ylen); // j = ystart+1 5377 5378 z_aghi(xstart, -1); // i = xstart-1; 5379 z_brl(L_done); 5380 5381 // Use free slots in the current stackframe instead of push/pop. 5382 Address zsave(Z_SP, _z_abi(carg_1)); 5383 reg2mem_opt(z, zsave); 5384 5385 5386 Label L_last_x; 5387 5388 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5389 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5390 z_aghi(xstart, -1); // i = xstart-1; 5391 z_brl(L_last_x); 5392 5393 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5394 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5395 5396 5397 Label L_third_loop_prologue; 5398 5399 bind(L_third_loop_prologue); 5400 5401 Address xsave(Z_SP, _z_abi(carg_2)); 5402 Address xlensave(Z_SP, _z_abi(carg_3)); 5403 Address ylensave(Z_SP, _z_abi(carg_4)); 5404 5405 reg2mem_opt(x, xsave); 5406 reg2mem_opt(xstart, xlensave); 5407 reg2mem_opt(ylen, ylensave); 5408 5409 5410 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5411 5412 mem2reg_opt(z, zsave); 5413 mem2reg_opt(x, xsave); 5414 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5415 mem2reg_opt(ylen, ylensave); 5416 5417 add2reg(tmp3, 1, xlen); 5418 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5419 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5420 z_aghi(tmp3, -1); 5421 z_brl(L_done); 5422 5423 rshift(carry, 32); 5424 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5425 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5426 z_bru(L_second_loop); 5427 5428 // Next infrequent code is moved outside loops. 5429 bind(L_last_x); 5430 5431 clear_reg(x_xstart); 5432 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5433 z_bru(L_third_loop_prologue); 5434 5435 bind(L_done); 5436 5437 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5438 } 5439 5440 void MacroAssembler::asm_assert(branch_condition cond, const char* msg, int id, bool is_static) { 5441 #ifdef ASSERT 5442 Label ok; 5443 z_brc(cond, ok); 5444 is_static ? stop_static(msg, id) : stop(msg, id); 5445 bind(ok); 5446 #endif // ASSERT 5447 } 5448 5449 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5450 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5451 #ifdef ASSERT 5452 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id); 5453 #endif // ASSERT 5454 } 5455 5456 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5457 Register mem_base, const char* msg, int id) { 5458 #ifdef ASSERT 5459 switch (size) { 5460 case 4: 5461 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5462 break; 5463 case 8: 5464 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5465 break; 5466 default: 5467 ShouldNotReachHere(); 5468 } 5469 // if relocation is not allowed then stop_static() will be called otherwise call stop() 5470 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id, !allow_relocation); 5471 #endif // ASSERT 5472 } 5473 5474 // Check the condition 5475 // expected_size == FP - SP 5476 // after transformation: 5477 // expected_size - FP + SP == 0 5478 // Destroys Register expected_size if no tmp register is passed. 5479 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5480 #ifdef ASSERT 5481 lgr_if_needed(tmp, expected_size); 5482 z_algr(tmp, Z_SP); 5483 z_slg(tmp, 0, Z_R0, Z_SP); 5484 asm_assert(bcondEqual, msg, id); 5485 #endif // ASSERT 5486 } 5487 5488 // Save and restore functions: Exclude Z_R0. 5489 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5490 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5491 if (include_fp) { 5492 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5493 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5494 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5495 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5496 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5497 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5498 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5499 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5500 } 5501 if (include_flags) { 5502 Label done; 5503 z_mvi(Address(dst, offset), 2); // encoding: equal 5504 z_bre(done); 5505 z_mvi(Address(dst, offset), 4); // encoding: higher 5506 z_brh(done); 5507 z_mvi(Address(dst, offset), 1); // encoding: lower 5508 bind(done); 5509 } 5510 } 5511 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5512 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5513 if (include_fp) { 5514 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5515 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5516 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5517 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5518 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5519 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5520 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5521 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5522 } 5523 if (include_flags) { 5524 z_cli(Address(src, offset), 2); // see encoding above 5525 } 5526 } 5527 5528 // Plausibility check for oops. 5529 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5530 if (!VerifyOops) return; 5531 5532 BLOCK_COMMENT("verify_oop {"); 5533 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5534 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5535 5536 save_return_pc(); 5537 5538 // Push frame, but preserve flags 5539 z_lgr(Z_R0, Z_SP); 5540 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5541 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5542 5543 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5544 5545 lgr_if_needed(Z_ARG2, oop); 5546 load_const_optimized(Z_ARG1, (address)msg); 5547 load_const_optimized(Z_R1, entry_addr); 5548 z_lg(Z_R1, 0, Z_R1); 5549 call_c(Z_R1); 5550 5551 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5552 pop_frame(); 5553 restore_return_pc(); 5554 5555 BLOCK_COMMENT("} verify_oop "); 5556 } 5557 5558 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5559 if (!VerifyOops) return; 5560 5561 BLOCK_COMMENT("verify_oop {"); 5562 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5563 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5564 5565 save_return_pc(); 5566 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5567 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5568 5569 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5570 load_const_optimized(Z_ARG1, (address)msg); 5571 load_const_optimized(Z_R1, entry_addr); 5572 z_lg(Z_R1, 0, Z_R1); 5573 call_c(Z_R1); 5574 5575 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5576 pop_frame(); 5577 restore_return_pc(); 5578 5579 BLOCK_COMMENT("} verify_oop "); 5580 } 5581 5582 const char* MacroAssembler::stop_types[] = { 5583 "stop", 5584 "untested", 5585 "unimplemented", 5586 "shouldnotreachhere" 5587 }; 5588 5589 static void stop_on_request(const char* tp, const char* msg) { 5590 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5591 guarantee(false, "Z assembly code requires stop: %s", msg); 5592 } 5593 5594 void MacroAssembler::stop(int type, const char* msg, int id) { 5595 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5596 5597 // Setup arguments. 5598 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5599 load_const(Z_ARG2, (void*) msg); 5600 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5601 save_return_pc(); // Saves return pc Z_R14. 5602 push_frame_abi160(0); 5603 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5604 // The plain disassembler does not recognize illtrap. It instead displays 5605 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5606 // the proper beginning of the next instruction. 5607 z_illtrap(id); // Illegal instruction. 5608 z_illtrap(id); // Illegal instruction. 5609 5610 BLOCK_COMMENT(" } stop"); 5611 } 5612 5613 // Special version of stop() for code size reduction. 5614 // Reuses the previously generated call sequence, if any. 5615 // Generates the call sequence on its own, if necessary. 5616 // Note: This code will work only in non-relocatable code! 5617 // The relative address of the data elements (arg1, arg2) must not change. 5618 // The reentry point must not move relative to it's users. This prerequisite 5619 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5620 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5621 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5622 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==nullptr?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5623 5624 // Setup arguments. 5625 if (allow_relocation) { 5626 // Relocatable version (for comparison purposes). Remove after some time. 5627 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5628 load_const(Z_ARG2, (void*) msg); 5629 } else { 5630 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 5631 load_absolute_address(Z_ARG2, (address)msg); 5632 } 5633 if ((reentry != nullptr) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 5634 BLOCK_COMMENT("branch to reentry point:"); 5635 z_brc(bcondAlways, reentry); 5636 } else { 5637 BLOCK_COMMENT("reentry point:"); 5638 reentry = pc(); // Re-entry point for subsequent stop calls. 5639 save_return_pc(); // Saves return pc Z_R14. 5640 push_frame_abi160(0); 5641 if (allow_relocation) { 5642 reentry = nullptr; // Prevent reentry if code relocation is allowed. 5643 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5644 } else { 5645 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5646 } 5647 z_illtrap(id); // Illegal instruction as emergency stop, should the above call return. 5648 } 5649 BLOCK_COMMENT(" } stop_chain"); 5650 5651 return reentry; 5652 } 5653 5654 // Special version of stop() for code size reduction. 5655 // Assumes constant relative addresses for data and runtime call. 5656 void MacroAssembler::stop_static(int type, const char* msg, int id) { 5657 stop_chain(nullptr, type, msg, id, false); 5658 } 5659 5660 void MacroAssembler::stop_subroutine() { 5661 unimplemented("stop_subroutine", 710); 5662 } 5663 5664 // Prints msg to stdout from within generated code.. 5665 void MacroAssembler::warn(const char* msg) { 5666 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 5667 load_absolute_address(Z_R1, (address) warning); 5668 load_absolute_address(Z_ARG1, (address) msg); 5669 (void) call(Z_R1); 5670 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 5671 } 5672 5673 #ifndef PRODUCT 5674 5675 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 5676 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 5677 if (!ZapEmptyStackFields) return; 5678 BLOCK_COMMENT("zap memory region {"); 5679 load_const_optimized(val, 0x0101010101010101); 5680 int size = before + after; 5681 if (low == high && size < 5 && size > 0) { 5682 int offset = -before*BytesPerWord; 5683 for (int i = 0; i < size; ++i) { 5684 z_stg(val, Address(low, offset)); 5685 offset +=(1*BytesPerWord); 5686 } 5687 } else { 5688 add2reg(addr, -before*BytesPerWord, low); 5689 if (after) { 5690 #ifdef ASSERT 5691 jlong check = after * BytesPerWord; 5692 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 5693 #endif 5694 add2reg(high, after * BytesPerWord); 5695 } 5696 NearLabel loop; 5697 bind(loop); 5698 z_stg(val, Address(addr)); 5699 add2reg(addr, 8); 5700 compare64_and_branch(addr, high, bcondNotHigh, loop); 5701 if (after) { 5702 add2reg(high, -after * BytesPerWord); 5703 } 5704 } 5705 BLOCK_COMMENT("} zap memory region"); 5706 } 5707 #endif // !PRODUCT 5708 5709 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { 5710 _masm = masm; 5711 _masm->load_absolute_address(_rscratch, (address)flag_addr); 5712 _masm->load_and_test_int(_rscratch, Address(_rscratch)); 5713 if (value) { 5714 _masm->z_brne(_label); // Skip if true, i.e. != 0. 5715 } else { 5716 _masm->z_bre(_label); // Skip if false, i.e. == 0. 5717 } 5718 } 5719 5720 SkipIfEqual::~SkipIfEqual() { 5721 _masm->bind(_label); 5722 } 5723 5724 // Implements lightweight-locking. 5725 // Branches to slow upon failure to lock the object. 5726 // Falls through upon success. 5727 // 5728 // - obj: the object to be locked, contents preserved. 5729 // - hdr: the header, already loaded from obj, contents destroyed. 5730 // Note: make sure Z_R1 is not manipulated here when C2 compiler is in play 5731 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register temp, Label& slow_case) { 5732 5733 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5734 assert_different_registers(obj, hdr, temp); 5735 5736 // First we need to check if the lock-stack has room for pushing the object reference. 5737 z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5738 5739 compareU32_and_branch(temp, (unsigned)LockStack::end_offset()-1, bcondHigh, slow_case); 5740 5741 // attempting a lightweight_lock 5742 // Load (object->mark() | 1) into hdr 5743 z_oill(hdr, markWord::unlocked_value); 5744 5745 z_lgr(temp, hdr); 5746 5747 // Clear lock-bits from hdr (locked state) 5748 z_xilf(temp, markWord::unlocked_value); 5749 5750 z_csg(hdr, temp, oopDesc::mark_offset_in_bytes(), obj); 5751 branch_optimized(Assembler::bcondNotEqual, slow_case); 5752 5753 // After successful lock, push object on lock-stack 5754 z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5755 z_stg(obj, Address(Z_thread, temp)); 5756 z_ahi(temp, oopSize); 5757 z_st(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5758 5759 // as locking was successful, set CC to EQ 5760 z_cr(temp, temp); 5761 } 5762 5763 // Implements lightweight-unlocking. 5764 // Branches to slow upon failure. 5765 // Falls through upon success. 5766 // 5767 // - obj: the object to be unlocked 5768 // - hdr: the (pre-loaded) header of the object, will be destroyed 5769 // - Z_R1_scratch: will be killed in case of Interpreter & C1 Compiler 5770 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp, Label& slow) { 5771 5772 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5773 assert_different_registers(obj, hdr, tmp); 5774 5775 #ifdef ASSERT 5776 { 5777 // Check that hdr is lightweight-locked. 5778 Label hdr_ok; 5779 z_lgr(tmp, hdr); 5780 z_nill(tmp, markWord::lock_mask_in_place); 5781 z_bre(hdr_ok); 5782 stop("Header is not lightweight-locked"); 5783 bind(hdr_ok); 5784 } 5785 { 5786 // The following checks rely on the fact that LockStack is only ever modified by 5787 // its owning thread, even if the lock got inflated concurrently; removal of LockStack 5788 // entries after inflation will happen delayed in that case. 5789 5790 // Check for lock-stack underflow. 5791 Label stack_ok; 5792 z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5793 compareU32_and_branch(tmp, (unsigned)LockStack::start_offset(), Assembler::bcondHigh, stack_ok); 5794 stop("Lock-stack underflow"); 5795 bind(stack_ok); 5796 } 5797 { 5798 // Check if the top of the lock-stack matches the unlocked object. 5799 Label tos_ok; 5800 z_aghi(tmp, -oopSize); 5801 z_lg(tmp, Address(Z_thread, tmp)); 5802 compare64_and_branch(tmp, obj, Assembler::bcondEqual, tos_ok); 5803 stop("Top of lock-stack does not match the unlocked object"); 5804 bind(tos_ok); 5805 } 5806 #endif // ASSERT 5807 5808 z_lgr(tmp, hdr); 5809 z_oill(tmp, markWord::unlocked_value); 5810 z_csg(hdr, tmp, oopDesc::mark_offset_in_bytes(), obj); 5811 branch_optimized(Assembler::bcondNotEqual, slow); 5812 5813 // After successful unlock, pop object from lock-stack 5814 #ifdef ASSERT 5815 z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); 5816 z_aghi(tmp, -oopSize); 5817 z_agr(tmp, Z_thread); 5818 z_xc(0, oopSize-1, tmp, 0, tmp); // wipe out lock-stack entry 5819 #endif 5820 z_alsi(in_bytes(JavaThread::lock_stack_top_offset()), Z_thread, -oopSize); // pop object 5821 z_cr(tmp, tmp); // set CC to EQ 5822 } 5823 5824 void MacroAssembler::pop_count_int(Register r_dst, Register r_src, Register r_tmp) { 5825 BLOCK_COMMENT("pop_count_int {"); 5826 5827 assert(r_tmp != noreg, "temp register required for pop_count_int, as code may run on machine older than z15"); 5828 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5829 5830 if (VM_Version::has_MiscInstrExt3()) { 5831 pop_count_int_with_ext3(r_dst, r_src); 5832 } else { 5833 pop_count_int_without_ext3(r_dst, r_src, r_tmp); 5834 } 5835 5836 BLOCK_COMMENT("} pop_count_int"); 5837 } 5838 5839 void MacroAssembler::pop_count_long(Register r_dst, Register r_src, Register r_tmp) { 5840 BLOCK_COMMENT("pop_count_long {"); 5841 5842 assert(r_tmp != noreg, "temp register required for pop_count_long, as code may run on machine older than z15"); 5843 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5844 5845 if (VM_Version::has_MiscInstrExt3()) { 5846 pop_count_long_with_ext3(r_dst, r_src); 5847 } else { 5848 pop_count_long_without_ext3(r_dst, r_src, r_tmp); 5849 } 5850 5851 BLOCK_COMMENT("} pop_count_long"); 5852 } 5853 5854 void MacroAssembler::pop_count_int_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 5855 BLOCK_COMMENT("pop_count_int_without_ext3 {"); 5856 5857 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 5858 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5859 5860 z_popcnt(r_dst, r_src, 0); 5861 z_srlg(r_tmp, r_dst, 16); 5862 z_alr(r_dst, r_tmp); 5863 z_srlg(r_tmp, r_dst, 8); 5864 z_alr(r_dst, r_tmp); 5865 z_llgcr(r_dst, r_dst); 5866 5867 BLOCK_COMMENT("} pop_count_int_without_ext3"); 5868 } 5869 5870 void MacroAssembler::pop_count_long_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 5871 BLOCK_COMMENT("pop_count_long_without_ext3 {"); 5872 5873 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 5874 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 5875 5876 z_popcnt(r_dst, r_src, 0); 5877 z_ahhlr(r_dst, r_dst, r_dst); 5878 z_sllg(r_tmp, r_dst, 16); 5879 z_algr(r_dst, r_tmp); 5880 z_sllg(r_tmp, r_dst, 8); 5881 z_algr(r_dst, r_tmp); 5882 z_srlg(r_dst, r_dst, 56); 5883 5884 BLOCK_COMMENT("} pop_count_long_without_ext3"); 5885 } 5886 5887 void MacroAssembler::pop_count_long_with_ext3(Register r_dst, Register r_src) { 5888 BLOCK_COMMENT("pop_count_long_with_ext3 {"); 5889 5890 guarantee(VM_Version::has_MiscInstrExt3(), 5891 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 5892 z_popcnt(r_dst, r_src, 8); 5893 5894 BLOCK_COMMENT("} pop_count_long_with_ext3"); 5895 } 5896 5897 void MacroAssembler::pop_count_int_with_ext3(Register r_dst, Register r_src) { 5898 BLOCK_COMMENT("pop_count_int_with_ext3 {"); 5899 5900 guarantee(VM_Version::has_MiscInstrExt3(), 5901 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 5902 z_llgfr(r_dst, r_src); 5903 z_popcnt(r_dst, r_dst, 8); 5904 5905 BLOCK_COMMENT("} pop_count_int_with_ext3"); 5906 }