1 /* 2 * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved. 4 * Copyright 2024 IBM Corporation. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/codeBuffer.hpp" 29 #include "asm/macroAssembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/collectedHeap.inline.hpp" 35 #include "interpreter/interpreter.hpp" 36 #include "gc/shared/cardTableBarrierSet.hpp" 37 #include "memory/resourceArea.hpp" 38 #include "memory/universe.hpp" 39 #include "oops/accessDecorators.hpp" 40 #include "oops/compressedKlass.inline.hpp" 41 #include "oops/compressedOops.inline.hpp" 42 #include "oops/klass.inline.hpp" 43 #include "prims/methodHandles.hpp" 44 #include "registerSaver_s390.hpp" 45 #include "runtime/icache.hpp" 46 #include "runtime/interfaceSupport.inline.hpp" 47 #include "runtime/objectMonitor.hpp" 48 #include "runtime/os.hpp" 49 #include "runtime/safepoint.hpp" 50 #include "runtime/safepointMechanism.hpp" 51 #include "runtime/sharedRuntime.hpp" 52 #include "runtime/stubRoutines.hpp" 53 #include "utilities/events.hpp" 54 #include "utilities/macros.hpp" 55 #include "utilities/powerOfTwo.hpp" 56 57 #include <ucontext.h> 58 59 #define BLOCK_COMMENT(str) block_comment(str) 60 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 61 62 // Move 32-bit register if destination and source are different. 63 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 64 if (rs != rd) { z_lr(rd, rs); } 65 } 66 67 // Move register if destination and source are different. 68 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 69 if (rs != rd) { z_lgr(rd, rs); } 70 } 71 72 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 73 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 74 if (rs != rd) { z_llgfr(rd, rs); } 75 } 76 77 // Move float register if destination and source are different. 78 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 79 if (rs != rd) { z_ldr(rd, rs); } 80 } 81 82 // Move integer register if destination and source are different. 83 // It is assumed that shorter-than-int types are already 84 // appropriately sign-extended. 85 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 86 BasicType src_type) { 87 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 88 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 89 90 if (dst_type == src_type) { 91 lgr_if_needed(dst, src); // Just move all 64 bits. 92 return; 93 } 94 95 switch (dst_type) { 96 // Do not support these types for now. 97 // case T_BOOLEAN: 98 case T_BYTE: // signed byte 99 switch (src_type) { 100 case T_INT: 101 z_lgbr(dst, src); 102 break; 103 default: 104 ShouldNotReachHere(); 105 } 106 return; 107 108 case T_CHAR: 109 case T_SHORT: 110 switch (src_type) { 111 case T_INT: 112 if (dst_type == T_CHAR) { 113 z_llghr(dst, src); 114 } else { 115 z_lghr(dst, src); 116 } 117 break; 118 default: 119 ShouldNotReachHere(); 120 } 121 return; 122 123 case T_INT: 124 switch (src_type) { 125 case T_BOOLEAN: 126 case T_BYTE: 127 case T_CHAR: 128 case T_SHORT: 129 case T_INT: 130 case T_LONG: 131 case T_OBJECT: 132 case T_ARRAY: 133 case T_VOID: 134 case T_ADDRESS: 135 lr_if_needed(dst, src); 136 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 137 return; 138 139 default: 140 assert(false, "non-integer src type"); 141 return; 142 } 143 case T_LONG: 144 switch (src_type) { 145 case T_BOOLEAN: 146 case T_BYTE: 147 case T_CHAR: 148 case T_SHORT: 149 case T_INT: 150 z_lgfr(dst, src); // sign extension 151 return; 152 153 case T_LONG: 154 case T_OBJECT: 155 case T_ARRAY: 156 case T_VOID: 157 case T_ADDRESS: 158 lgr_if_needed(dst, src); 159 return; 160 161 default: 162 assert(false, "non-integer src type"); 163 return; 164 } 165 return; 166 case T_OBJECT: 167 case T_ARRAY: 168 case T_VOID: 169 case T_ADDRESS: 170 switch (src_type) { 171 // These types don't make sense to be converted to pointers: 172 // case T_BOOLEAN: 173 // case T_BYTE: 174 // case T_CHAR: 175 // case T_SHORT: 176 177 case T_INT: 178 z_llgfr(dst, src); // zero extension 179 return; 180 181 case T_LONG: 182 case T_OBJECT: 183 case T_ARRAY: 184 case T_VOID: 185 case T_ADDRESS: 186 lgr_if_needed(dst, src); 187 return; 188 189 default: 190 assert(false, "non-integer src type"); 191 return; 192 } 193 return; 194 default: 195 assert(false, "non-integer dst type"); 196 return; 197 } 198 } 199 200 // Move float register if destination and source are different. 201 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 202 FloatRegister src, BasicType src_type) { 203 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 204 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 205 if (dst_type == src_type) { 206 ldr_if_needed(dst, src); // Just move all 64 bits. 207 } else { 208 switch (dst_type) { 209 case T_FLOAT: 210 assert(src_type == T_DOUBLE, "invalid float type combination"); 211 z_ledbr(dst, src); 212 return; 213 case T_DOUBLE: 214 assert(src_type == T_FLOAT, "invalid float type combination"); 215 z_ldebr(dst, src); 216 return; 217 default: 218 assert(false, "non-float dst type"); 219 return; 220 } 221 } 222 } 223 224 // Optimized emitter for reg to mem operations. 225 // Uses modern instructions if running on modern hardware, classic instructions 226 // otherwise. Prefers (usually shorter) classic instructions if applicable. 227 // Data register (reg) cannot be used as work register. 228 // 229 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 230 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 231 void MacroAssembler::freg2mem_opt(FloatRegister reg, 232 int64_t disp, 233 Register index, 234 Register base, 235 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 236 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 237 Register scratch) { 238 index = (index == noreg) ? Z_R0 : index; 239 if (Displacement::is_shortDisp(disp)) { 240 (this->*classic)(reg, disp, index, base); 241 } else { 242 if (Displacement::is_validDisp(disp)) { 243 (this->*modern)(reg, disp, index, base); 244 } else { 245 if (scratch != Z_R0 && scratch != Z_R1) { 246 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 247 } else { 248 if (scratch != Z_R0) { // scratch == Z_R1 249 if ((scratch == index) || (index == base)) { 250 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 251 } else { 252 add2reg(scratch, disp, base); 253 (this->*classic)(reg, 0, index, scratch); 254 if (base == scratch) { 255 add2reg(base, -disp); // Restore base. 256 } 257 } 258 } else { // scratch == Z_R0 259 z_lgr(scratch, base); 260 add2reg(base, disp); 261 (this->*classic)(reg, 0, index, base); 262 z_lgr(base, scratch); // Restore base. 263 } 264 } 265 } 266 } 267 } 268 269 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 270 if (is_double) { 271 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 272 } else { 273 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 274 } 275 } 276 277 // Optimized emitter for mem to reg operations. 278 // Uses modern instructions if running on modern hardware, classic instructions 279 // otherwise. Prefers (usually shorter) classic instructions if applicable. 280 // data register (reg) cannot be used as work register. 281 // 282 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 283 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 284 void MacroAssembler::mem2freg_opt(FloatRegister reg, 285 int64_t disp, 286 Register index, 287 Register base, 288 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 289 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 290 Register scratch) { 291 index = (index == noreg) ? Z_R0 : index; 292 if (Displacement::is_shortDisp(disp)) { 293 (this->*classic)(reg, disp, index, base); 294 } else { 295 if (Displacement::is_validDisp(disp)) { 296 (this->*modern)(reg, disp, index, base); 297 } else { 298 if (scratch != Z_R0 && scratch != Z_R1) { 299 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 300 } else { 301 if (scratch != Z_R0) { // scratch == Z_R1 302 if ((scratch == index) || (index == base)) { 303 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 304 } else { 305 add2reg(scratch, disp, base); 306 (this->*classic)(reg, 0, index, scratch); 307 if (base == scratch) { 308 add2reg(base, -disp); // Restore base. 309 } 310 } 311 } else { // scratch == Z_R0 312 z_lgr(scratch, base); 313 add2reg(base, disp); 314 (this->*classic)(reg, 0, index, base); 315 z_lgr(base, scratch); // Restore base. 316 } 317 } 318 } 319 } 320 } 321 322 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 323 if (is_double) { 324 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 325 } else { 326 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 327 } 328 } 329 330 // Optimized emitter for reg to mem operations. 331 // Uses modern instructions if running on modern hardware, classic instructions 332 // otherwise. Prefers (usually shorter) classic instructions if applicable. 333 // Data register (reg) cannot be used as work register. 334 // 335 // Don't rely on register locking, instead pass a scratch register 336 // (Z_R0 by default) 337 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 338 void MacroAssembler::reg2mem_opt(Register reg, 339 int64_t disp, 340 Register index, 341 Register base, 342 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 343 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 344 Register scratch) { 345 index = (index == noreg) ? Z_R0 : index; 346 if (Displacement::is_shortDisp(disp)) { 347 (this->*classic)(reg, disp, index, base); 348 } else { 349 if (Displacement::is_validDisp(disp)) { 350 (this->*modern)(reg, disp, index, base); 351 } else { 352 if (scratch != Z_R0 && scratch != Z_R1) { 353 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 354 } else { 355 if (scratch != Z_R0) { // scratch == Z_R1 356 if ((scratch == index) || (index == base)) { 357 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 358 } else { 359 add2reg(scratch, disp, base); 360 (this->*classic)(reg, 0, index, scratch); 361 if (base == scratch) { 362 add2reg(base, -disp); // Restore base. 363 } 364 } 365 } else { // scratch == Z_R0 366 if ((scratch == reg) || (scratch == base) || (reg == base)) { 367 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 368 } else { 369 z_lgr(scratch, base); 370 add2reg(base, disp); 371 (this->*classic)(reg, 0, index, base); 372 z_lgr(base, scratch); // Restore base. 373 } 374 } 375 } 376 } 377 } 378 } 379 380 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 381 int store_offset = offset(); 382 if (is_double) { 383 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 384 } else { 385 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 386 } 387 return store_offset; 388 } 389 390 // Optimized emitter for mem to reg operations. 391 // Uses modern instructions if running on modern hardware, classic instructions 392 // otherwise. Prefers (usually shorter) classic instructions if applicable. 393 // Data register (reg) will be used as work register where possible. 394 void MacroAssembler::mem2reg_opt(Register reg, 395 int64_t disp, 396 Register index, 397 Register base, 398 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 399 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 400 index = (index == noreg) ? Z_R0 : index; 401 if (Displacement::is_shortDisp(disp)) { 402 (this->*classic)(reg, disp, index, base); 403 } else { 404 if (Displacement::is_validDisp(disp)) { 405 (this->*modern)(reg, disp, index, base); 406 } else { 407 if ((reg == index) && (reg == base)) { 408 z_sllg(reg, reg, 1); 409 add2reg(reg, disp); 410 (this->*classic)(reg, 0, noreg, reg); 411 } else if ((reg == index) && (reg != Z_R0)) { 412 add2reg(reg, disp); 413 (this->*classic)(reg, 0, reg, base); 414 } else if (reg == base) { 415 add2reg(reg, disp); 416 (this->*classic)(reg, 0, index, reg); 417 } else if (reg != Z_R0) { 418 add2reg(reg, disp, base); 419 (this->*classic)(reg, 0, index, reg); 420 } else { // reg == Z_R0 && reg != base here 421 add2reg(base, disp); 422 (this->*classic)(reg, 0, index, base); 423 add2reg(base, -disp); 424 } 425 } 426 } 427 } 428 429 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 430 if (is_double) { 431 z_lg(reg, a); 432 } else { 433 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 434 } 435 } 436 437 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 438 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 439 } 440 441 void MacroAssembler::and_imm(Register r, long mask, 442 Register tmp /* = Z_R0 */, 443 bool wide /* = false */) { 444 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 445 446 if (!wide) { 447 z_nilf(r, mask); 448 return; 449 } 450 451 assert(r != tmp, " need a different temporary register !"); 452 load_const_optimized(tmp, mask); 453 z_ngr(r, tmp); 454 } 455 456 // Calculate the 1's complement. 457 // Note: The condition code is neither preserved nor correctly set by this code!!! 458 // Note: (wide == false) does not protect the high order half of the target register 459 // from alteration. It only serves as optimization hint for 32-bit results. 460 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 461 462 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 463 z_xilf(r1, -1); 464 if (wide) { 465 z_xihf(r1, -1); 466 } 467 } else { // Distinct src and dst registers. 468 load_const_optimized(r1, -1); 469 z_xgr(r1, r2); 470 } 471 } 472 473 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 474 assert(lBitPos >= 0, "zero is leftmost bit position"); 475 assert(rBitPos <= 63, "63 is rightmost bit position"); 476 assert(lBitPos <= rBitPos, "inverted selection interval"); 477 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 478 } 479 480 // Helper function for the "Rotate_then_<logicalOP>" emitters. 481 // Rotate src, then mask register contents such that only bits in range survive. 482 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 483 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 484 // The caller must ensure that the selected range only contains bits with defined value. 485 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 486 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 487 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 488 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 489 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 490 // Pre-determine which parts of dst will be zero after shift/rotate. 491 bool llZero = sll4rll && (nRotate >= 16); 492 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 493 bool lfZero = llZero && lhZero; 494 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 495 bool hhZero = (srl4rll && (nRotate <= -16)); 496 bool hfZero = hlZero && hhZero; 497 498 // rotate then mask src operand. 499 // if oneBits == true, all bits outside selected range are 1s. 500 // if oneBits == false, all bits outside selected range are 0s. 501 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 502 if (dst32bit) { 503 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 504 } else { 505 if (sll4rll) { z_sllg(dst, src, nRotate); } 506 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 507 else { z_rllg(dst, src, nRotate); } 508 } 509 } else { 510 if (sll4rll) { z_sllg(dst, src, nRotate); } 511 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 512 else { z_rllg(dst, src, nRotate); } 513 } 514 515 unsigned long range_mask = create_mask(lBitPos, rBitPos); 516 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 517 unsigned int range_mask_l = (unsigned int)range_mask; 518 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 519 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 520 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 521 unsigned short range_mask_ll = (unsigned short)range_mask; 522 // Works for z9 and newer H/W. 523 if (oneBits) { 524 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 525 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 526 } else { 527 // All bits outside range become 0s 528 if (((~range_mask_l) != 0) && !lfZero) { 529 z_nilf(dst, range_mask_l); 530 } 531 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 532 z_nihf(dst, range_mask_h); 533 } 534 } 535 } 536 537 // Rotate src, then insert selected range from rotated src into dst. 538 // Clear dst before, if requested. 539 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 540 int nRotate, bool clear_dst) { 541 // This version does not depend on src being zero-extended int2long. 542 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 543 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 544 } 545 546 // Rotate src, then and selected range from rotated src into dst. 547 // Set condition code only if so requested. Otherwise it is unpredictable. 548 // See performance note in macroAssembler_s390.hpp for important information. 549 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 550 int nRotate, bool test_only) { 551 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 552 // This version does not depend on src being zero-extended int2long. 553 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 554 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 555 } 556 557 // Rotate src, then or selected range from rotated src into dst. 558 // Set condition code only if so requested. Otherwise it is unpredictable. 559 // See performance note in macroAssembler_s390.hpp for important information. 560 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 561 int nRotate, bool test_only) { 562 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 563 // This version does not depend on src being zero-extended int2long. 564 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 565 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 566 } 567 568 // Rotate src, then xor selected range from rotated src into dst. 569 // Set condition code only if so requested. Otherwise it is unpredictable. 570 // See performance note in macroAssembler_s390.hpp for important information. 571 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 572 int nRotate, bool test_only) { 573 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 574 // This version does not depend on src being zero-extended int2long. 575 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 576 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 577 } 578 579 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 580 if (inc.is_register()) { 581 z_agr(r1, inc.as_register()); 582 } else { // constant 583 intptr_t imm = inc.as_constant(); 584 add2reg(r1, imm); 585 } 586 } 587 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 588 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 589 // calculation and is thus rather slow. 590 // 591 // There is no handling for special cases, e.g. cval==0 or cval==1. 592 // 593 // Returns len of generated code block. 594 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 595 int block_start = offset(); 596 597 bool sign_flip = cval < 0; 598 cval = sign_flip ? -cval : cval; 599 600 BLOCK_COMMENT("Reg64*Con16 {"); 601 602 int bit1 = cval & -cval; 603 if (bit1 == cval) { 604 z_sllg(rval, rval, exact_log2(bit1)); 605 if (sign_flip) { z_lcgr(rval, rval); } 606 } else { 607 int bit2 = (cval-bit1) & -(cval-bit1); 608 if ((bit1+bit2) == cval) { 609 z_sllg(work, rval, exact_log2(bit1)); 610 z_sllg(rval, rval, exact_log2(bit2)); 611 z_agr(rval, work); 612 if (sign_flip) { z_lcgr(rval, rval); } 613 } else { 614 if (sign_flip) { z_mghi(rval, -cval); } 615 else { z_mghi(rval, cval); } 616 } 617 } 618 BLOCK_COMMENT("} Reg64*Con16"); 619 620 int block_end = offset(); 621 return block_end - block_start; 622 } 623 624 // Generic operation r1 := r2 + imm. 625 // 626 // Should produce the best code for each supported CPU version. 627 // r2 == noreg yields r1 := r1 + imm 628 // imm == 0 emits either no instruction or r1 := r2 ! 629 // NOTES: 1) Don't use this function where fixed sized 630 // instruction sequences are required!!! 631 // 2) Don't use this function if condition code 632 // setting is required! 633 // 3) Despite being declared as int64_t, the parameter imm 634 // must be a simm_32 value (= signed 32-bit integer). 635 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 636 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 637 638 if (r2 == noreg) { r2 = r1; } 639 640 // Handle special case imm == 0. 641 if (imm == 0) { 642 lgr_if_needed(r1, r2); 643 // Nothing else to do. 644 return; 645 } 646 647 if (!PreferLAoverADD || (r2 == Z_R0)) { 648 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 649 650 // Can we encode imm in 16 bits signed? 651 if (Immediate::is_simm16(imm)) { 652 if (r1 == r2) { 653 z_aghi(r1, imm); 654 return; 655 } 656 if (distinctOpnds) { 657 z_aghik(r1, r2, imm); 658 return; 659 } 660 z_lgr(r1, r2); 661 z_aghi(r1, imm); 662 return; 663 } 664 } else { 665 // Can we encode imm in 12 bits unsigned? 666 if (Displacement::is_shortDisp(imm)) { 667 z_la(r1, imm, r2); 668 return; 669 } 670 // Can we encode imm in 20 bits signed? 671 if (Displacement::is_validDisp(imm)) { 672 // Always use LAY instruction, so we don't need the tmp register. 673 z_lay(r1, imm, r2); 674 return; 675 } 676 677 } 678 679 // Can handle it (all possible values) with long immediates. 680 lgr_if_needed(r1, r2); 681 z_agfi(r1, imm); 682 } 683 684 // Generic operation r := b + x + d 685 // 686 // Addition of several operands with address generation semantics - sort of: 687 // - no restriction on the registers. Any register will do for any operand. 688 // - x == noreg: operand will be disregarded. 689 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 690 // - x == Z_R0: just disregard 691 // - b == Z_R0: use as operand. This is not address generation semantics!!! 692 // 693 // The same restrictions as on add2reg() are valid!!! 694 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 695 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 696 697 if (x == noreg) { x = Z_R0; } 698 if (b == noreg) { b = r; } 699 700 // Handle special case x == R0. 701 if (x == Z_R0) { 702 // Can simply add the immediate value to the base register. 703 add2reg(r, d, b); 704 return; 705 } 706 707 if (!PreferLAoverADD || (b == Z_R0)) { 708 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 709 // Handle special case d == 0. 710 if (d == 0) { 711 if (b == x) { z_sllg(r, b, 1); return; } 712 if (r == x) { z_agr(r, b); return; } 713 if (r == b) { z_agr(r, x); return; } 714 if (distinctOpnds) { z_agrk(r, x, b); return; } 715 z_lgr(r, b); 716 z_agr(r, x); 717 } else { 718 if (x == b) { z_sllg(r, x, 1); } 719 else if (r == x) { z_agr(r, b); } 720 else if (r == b) { z_agr(r, x); } 721 else if (distinctOpnds) { z_agrk(r, x, b); } 722 else { 723 z_lgr(r, b); 724 z_agr(r, x); 725 } 726 add2reg(r, d); 727 } 728 } else { 729 // Can we encode imm in 12 bits unsigned? 730 if (Displacement::is_shortDisp(d)) { 731 z_la(r, d, x, b); 732 return; 733 } 734 // Can we encode imm in 20 bits signed? 735 if (Displacement::is_validDisp(d)) { 736 z_lay(r, d, x, b); 737 return; 738 } 739 z_la(r, 0, x, b); 740 add2reg(r, d); 741 } 742 } 743 744 // Generic emitter (32bit) for direct memory increment. 745 // For optimal code, do not specify Z_R0 as temp register. 746 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 747 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 748 z_asi(a, imm); 749 } else { 750 z_lgf(tmp, a); 751 add2reg(tmp, imm); 752 z_st(tmp, a); 753 } 754 } 755 756 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 757 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 758 z_agsi(a, imm); 759 } else { 760 z_lg(tmp, a); 761 add2reg(tmp, imm); 762 z_stg(tmp, a); 763 } 764 } 765 766 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 767 switch (size_in_bytes) { 768 case 8: z_lg(dst, src); break; 769 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 770 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 771 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 772 default: ShouldNotReachHere(); 773 } 774 } 775 776 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 777 switch (size_in_bytes) { 778 case 8: z_stg(src, dst); break; 779 case 4: z_st(src, dst); break; 780 case 2: z_sth(src, dst); break; 781 case 1: z_stc(src, dst); break; 782 default: ShouldNotReachHere(); 783 } 784 } 785 786 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 787 // a high-order summand in register tmp. 788 // 789 // return value: < 0: No split required, si20 actually has property uimm12. 790 // >= 0: Split performed. Use return value as uimm12 displacement and 791 // tmp as index register. 792 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 793 assert(Immediate::is_simm20(si20_offset), "sanity"); 794 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 795 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 796 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 797 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 798 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 799 800 Register work = accumulate? Z_R0 : tmp; 801 802 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 803 z_lghi(work, ll_off>>12); // Implicit sign extension. 804 z_slag(work, work, 12); 805 } else { // Len of code = 0..10. 806 if (ll_off == 0) { return -1; } 807 // ll_off has 8 significant bits (at most) plus sign. 808 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 809 z_llilh(work, ll_off >> 16); 810 if (ll_off < 0) { // Sign-extension required. 811 z_lgfr(work, work); 812 } 813 } else { 814 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 815 z_llill(work, ll_off); 816 } else { // Non-zero bits in both halfbytes. 817 z_lghi(work, ll_off>>12); // Implicit sign extension. 818 z_slag(work, work, 12); 819 } 820 } 821 } 822 if (accumulate) { z_algr(tmp, work); } // len of code += 4 823 return lg_off; 824 } 825 826 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 827 if (Displacement::is_validDisp(si20)) { 828 z_ley(t, si20, a); 829 } else { 830 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 831 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 832 // pool loads). 833 bool accumulate = true; 834 bool fixed_codelen = true; 835 Register work; 836 837 if (fixed_codelen) { 838 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 839 } else { 840 accumulate = (a == tmp); 841 } 842 work = tmp; 843 844 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 845 if (disp12 < 0) { 846 z_le(t, si20, work); 847 } else { 848 if (accumulate) { 849 z_le(t, disp12, work); 850 } else { 851 z_le(t, disp12, work, a); 852 } 853 } 854 } 855 } 856 857 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 858 if (Displacement::is_validDisp(si20)) { 859 z_ldy(t, si20, a); 860 } else { 861 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 862 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 863 // pool loads). 864 bool accumulate = true; 865 bool fixed_codelen = true; 866 Register work; 867 868 if (fixed_codelen) { 869 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 870 } else { 871 accumulate = (a == tmp); 872 } 873 work = tmp; 874 875 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 876 if (disp12 < 0) { 877 z_ld(t, si20, work); 878 } else { 879 if (accumulate) { 880 z_ld(t, disp12, work); 881 } else { 882 z_ld(t, disp12, work, a); 883 } 884 } 885 } 886 } 887 888 // PCrelative TOC access. 889 // Returns distance (in bytes) from current position to start of consts section. 890 // Returns 0 (zero) if no consts section exists or if it has size zero. 891 long MacroAssembler::toc_distance() { 892 CodeSection* cs = code()->consts(); 893 return (long)((cs != nullptr) ? cs->start()-pc() : 0); 894 } 895 896 // Implementation on x86/sparc assumes that constant and instruction section are 897 // adjacent, but this doesn't hold. Two special situations may occur, that we must 898 // be able to handle: 899 // 1. const section may be located apart from the inst section. 900 // 2. const section may be empty 901 // In both cases, we use the const section's start address to compute the "TOC", 902 // this seems to occur only temporarily; in the final step we always seem to end up 903 // with the pc-relatice variant. 904 // 905 // PC-relative offset could be +/-2**32 -> use long for disp 906 // Furthermore: makes no sense to have special code for 907 // adjacent const and inst sections. 908 void MacroAssembler::load_toc(Register Rtoc) { 909 // Simply use distance from start of const section (should be patched in the end). 910 long disp = toc_distance(); 911 912 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 913 relocate(rspec); 914 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 915 } 916 917 // PCrelative TOC access. 918 // Load from anywhere pcrelative (with relocation of load instr) 919 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 920 address pc = this->pc(); 921 ptrdiff_t total_distance = dataLocation - pc; 922 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 923 924 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 925 assert(total_distance != 0, "sanity"); 926 927 // Some extra safety net. 928 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 929 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 930 } 931 932 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 933 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 934 } 935 936 937 // PCrelative TOC access. 938 // Load from anywhere pcrelative (with relocation of load instr) 939 // loaded addr has to be relocated when added to constant pool. 940 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 941 address pc = this->pc(); 942 ptrdiff_t total_distance = addrLocation - pc; 943 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 944 945 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 946 947 // Some extra safety net. 948 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 949 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 950 } 951 952 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 953 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 954 } 955 956 // Generic operation: load a value from memory and test. 957 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 958 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 959 z_lb(dst, a); 960 z_ltr(dst, dst); 961 } 962 963 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 964 int64_t disp = a.disp20(); 965 if (Displacement::is_shortDisp(disp)) { 966 z_lh(dst, a); 967 } else if (Displacement::is_longDisp(disp)) { 968 z_lhy(dst, a); 969 } else { 970 guarantee(false, "displacement out of range"); 971 } 972 z_ltr(dst, dst); 973 } 974 975 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 976 z_lt(dst, a); 977 } 978 979 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 980 z_ltgf(dst, a); 981 } 982 983 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 984 z_ltg(dst, a); 985 } 986 987 // Test a bit in memory. 988 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 989 assert(a.index() == noreg, "no index reg allowed in testbit"); 990 if (bit <= 7) { 991 z_tm(a.disp() + 3, a.base(), 1 << bit); 992 } else if (bit <= 15) { 993 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 994 } else if (bit <= 23) { 995 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 996 } else if (bit <= 31) { 997 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 998 } else { 999 ShouldNotReachHere(); 1000 } 1001 } 1002 1003 // Test a bit in a register. Result is reflected in CC. 1004 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1005 if (bitPos < 16) { 1006 z_tmll(r, 1U<<bitPos); 1007 } else if (bitPos < 32) { 1008 z_tmlh(r, 1U<<(bitPos-16)); 1009 } else if (bitPos < 48) { 1010 z_tmhl(r, 1U<<(bitPos-32)); 1011 } else if (bitPos < 64) { 1012 z_tmhh(r, 1U<<(bitPos-48)); 1013 } else { 1014 ShouldNotReachHere(); 1015 } 1016 } 1017 1018 void MacroAssembler::prefetch_read(Address a) { 1019 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1020 } 1021 void MacroAssembler::prefetch_update(Address a) { 1022 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1023 } 1024 1025 // Clear a register, i.e. load const zero into reg. 1026 // Return len (in bytes) of generated instruction(s). 1027 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1028 // set_cc: Use instruction that sets the condition code, if true. 1029 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1030 unsigned int start_off = offset(); 1031 if (whole_reg) { 1032 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1033 } else { // Only 32bit register. 1034 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1035 } 1036 return offset() - start_off; 1037 } 1038 1039 #ifdef ASSERT 1040 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1041 switch (pattern_len) { 1042 case 1: 1043 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1044 case 2: 1045 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1046 case 4: 1047 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1048 case 8: 1049 return load_const_optimized_rtn_len(r, pattern, true); 1050 break; 1051 default: 1052 guarantee(false, "preset_reg: bad len"); 1053 } 1054 return 0; 1055 } 1056 #endif 1057 1058 // addr: Address descriptor of memory to clear. Index register will not be used! 1059 // size: Number of bytes to clear. 1060 // condition code will not be preserved. 1061 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1062 // !!! Use store_const() instead !!! 1063 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) { 1064 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large"); 1065 1066 switch (size) { 1067 case 0: 1068 return; 1069 case 1: 1070 z_mvi(addr, 0); 1071 return; 1072 case 2: 1073 z_mvhhi(addr, 0); 1074 return; 1075 case 4: 1076 z_mvhi(addr, 0); 1077 return; 1078 case 8: 1079 z_mvghi(addr, 0); 1080 return; 1081 default: ; // Fallthru to xc. 1082 } 1083 1084 // Caution: the emitter with Address operands does implicitly decrement the length 1085 if (size <= 256) { 1086 z_xc(addr, size, addr); 1087 } else { 1088 unsigned int offset = addr.disp(); 1089 unsigned int incr = 256; 1090 for (unsigned int i = 0; i <= size-incr; i += incr) { 1091 z_xc(offset, incr - 1, addr.base(), offset, addr.base()); 1092 offset += incr; 1093 } 1094 unsigned int rest = size - (offset - addr.disp()); 1095 if (size > 0) { 1096 z_xc(offset, rest-1, addr.base(), offset, addr.base()); 1097 } 1098 } 1099 } 1100 1101 void MacroAssembler::align(int modulus) { 1102 align(modulus, offset()); 1103 } 1104 1105 void MacroAssembler::align(int modulus, int target) { 1106 assert(((modulus % 2 == 0) && (target % 2 == 0)), "needs to be even"); 1107 int delta = target - offset(); 1108 while ((offset() + delta) % modulus != 0) z_nop(); 1109 } 1110 1111 // Special version for non-relocateable code if required alignment 1112 // is larger than CodeEntryAlignment. 1113 void MacroAssembler::align_address(int modulus) { 1114 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1115 } 1116 1117 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1118 Register temp_reg, 1119 int64_t extra_slot_offset) { 1120 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1121 // which issues an unnecessary add instruction. 1122 int stackElementSize = Interpreter::stackElementSize; 1123 int64_t offset = extra_slot_offset * stackElementSize; 1124 const Register argbase = Z_esp; 1125 if (arg_slot.is_constant()) { 1126 offset += arg_slot.as_constant() * stackElementSize; 1127 return Address(argbase, offset); 1128 } 1129 // else 1130 assert(temp_reg != noreg, "must specify"); 1131 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1132 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1133 return Address(argbase, temp_reg, offset); 1134 } 1135 1136 1137 //=================================================================== 1138 //=== START C O N S T A N T S I N C O D E S T R E A M === 1139 //=================================================================== 1140 //=== P A T CH A B L E C O N S T A N T S === 1141 //=================================================================== 1142 1143 1144 //--------------------------------------------------- 1145 // Load (patchable) constant into register 1146 //--------------------------------------------------- 1147 1148 1149 // Load absolute address (and try to optimize). 1150 // Note: This method is usable only for position-fixed code, 1151 // referring to a position-fixed target location. 1152 // If not so, relocations and patching must be used. 1153 void MacroAssembler::load_absolute_address(Register d, address addr) { 1154 assert(addr != nullptr, "should not happen"); 1155 BLOCK_COMMENT("load_absolute_address:"); 1156 if (addr == nullptr) { 1157 z_larl(d, pc()); // Dummy emit for size calc. 1158 return; 1159 } 1160 1161 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1162 z_larl(d, addr); 1163 return; 1164 } 1165 1166 load_const_optimized(d, (long)addr); 1167 } 1168 1169 // Load a 64bit constant. 1170 // Patchable code sequence, but not atomically patchable. 1171 // Make sure to keep code size constant -> no value-dependent optimizations. 1172 // Do not kill condition code. 1173 void MacroAssembler::load_const(Register t, long x) { 1174 // Note: Right shift is only cleanly defined for unsigned types 1175 // or for signed types with nonnegative values. 1176 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1177 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1178 } 1179 1180 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1181 // Patchable code sequence, but not atomically patchable. 1182 // Make sure to keep code size constant -> no value-dependent optimizations. 1183 // Do not kill condition code. 1184 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1185 if (sign_extend) { Assembler::z_lgfi(t, x); } 1186 else { Assembler::z_llilf(t, x); } 1187 } 1188 1189 // Load narrow oop constant, no decompression. 1190 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1191 assert(UseCompressedOops, "must be on to call this method"); 1192 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1193 } 1194 1195 // Load narrow klass constant, compression required. 1196 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1197 assert(UseCompressedClassPointers, "must be on to call this method"); 1198 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1199 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1200 } 1201 1202 //------------------------------------------------------ 1203 // Compare (patchable) constant with register. 1204 //------------------------------------------------------ 1205 1206 // Compare narrow oop in reg with narrow oop constant, no decompression. 1207 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1208 assert(UseCompressedOops, "must be on to call this method"); 1209 1210 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1211 } 1212 1213 // Compare narrow oop in reg with narrow oop constant, no decompression. 1214 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1215 assert(UseCompressedClassPointers, "must be on to call this method"); 1216 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1217 1218 Assembler::z_clfi(klass1, encoded_k); 1219 } 1220 1221 //---------------------------------------------------------- 1222 // Check which kind of load_constant we have here. 1223 //---------------------------------------------------------- 1224 1225 // Detection of CPU version dependent load_const sequence. 1226 // The detection is valid only for code sequences generated by load_const, 1227 // not load_const_optimized. 1228 bool MacroAssembler::is_load_const(address a) { 1229 unsigned long inst1, inst2; 1230 unsigned int len1, len2; 1231 1232 len1 = get_instruction(a, &inst1); 1233 len2 = get_instruction(a + len1, &inst2); 1234 1235 return is_z_iihf(inst1) && is_z_iilf(inst2); 1236 } 1237 1238 // Detection of CPU version dependent load_const_32to64 sequence. 1239 // Mostly used for narrow oops and narrow Klass pointers. 1240 // The detection is valid only for code sequences generated by load_const_32to64. 1241 bool MacroAssembler::is_load_const_32to64(address pos) { 1242 unsigned long inst1, inst2; 1243 unsigned int len1; 1244 1245 len1 = get_instruction(pos, &inst1); 1246 return is_z_llilf(inst1); 1247 } 1248 1249 // Detection of compare_immediate_narrow sequence. 1250 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1251 bool MacroAssembler::is_compare_immediate32(address pos) { 1252 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1253 } 1254 1255 // Detection of compare_immediate_narrow sequence. 1256 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1257 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1258 return is_compare_immediate32(pos); 1259 } 1260 1261 // Detection of compare_immediate_narrow sequence. 1262 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1263 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1264 return is_compare_immediate32(pos); 1265 } 1266 1267 //----------------------------------- 1268 // patch the load_constant 1269 //----------------------------------- 1270 1271 // CPU-version dependent patching of load_const. 1272 void MacroAssembler::patch_const(address a, long x) { 1273 assert(is_load_const(a), "not a load of a constant"); 1274 // Note: Right shift is only cleanly defined for unsigned types 1275 // or for signed types with nonnegative values. 1276 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1277 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1278 } 1279 1280 // Patching the value of CPU version dependent load_const_32to64 sequence. 1281 // The passed ptr MUST be in compressed format! 1282 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1283 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1284 1285 set_imm32(pos, np); 1286 return 6; 1287 } 1288 1289 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1290 // The passed ptr MUST be in compressed format! 1291 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1292 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1293 1294 set_imm32(pos, np); 1295 return 6; 1296 } 1297 1298 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1299 // The passed ptr must NOT be in compressed format! 1300 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1301 assert(UseCompressedOops, "Can only patch compressed oops"); 1302 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1303 } 1304 1305 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1306 // The passed ptr must NOT be in compressed format! 1307 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1308 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1309 1310 narrowKlass nk = CompressedKlassPointers::encode(k); 1311 return patch_load_const_32to64(pos, nk); 1312 } 1313 1314 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1315 // The passed ptr must NOT be in compressed format! 1316 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1317 assert(UseCompressedOops, "Can only patch compressed oops"); 1318 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1319 } 1320 1321 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1322 // The passed ptr must NOT be in compressed format! 1323 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1324 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1325 1326 narrowKlass nk = CompressedKlassPointers::encode(k); 1327 return patch_compare_immediate_32(pos, nk); 1328 } 1329 1330 //------------------------------------------------------------------------ 1331 // Extract the constant from a load_constant instruction stream. 1332 //------------------------------------------------------------------------ 1333 1334 // Get constant from a load_const sequence. 1335 long MacroAssembler::get_const(address a) { 1336 assert(is_load_const(a), "not a load of a constant"); 1337 unsigned long x; 1338 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1339 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1340 return (long) x; 1341 } 1342 1343 //-------------------------------------- 1344 // Store a constant in memory. 1345 //-------------------------------------- 1346 1347 // General emitter to move a constant to memory. 1348 // The store is atomic. 1349 // o Address must be given in RS format (no index register) 1350 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1351 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1352 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1353 // o Memory slot must be at least as wide as constant, will assert otherwise. 1354 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1355 int MacroAssembler::store_const(const Address &dest, long imm, 1356 unsigned int lm, unsigned int lc, 1357 Register scratch) { 1358 int64_t disp = dest.disp(); 1359 Register base = dest.base(); 1360 assert(!dest.has_index(), "not supported"); 1361 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1362 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1363 assert(lm>=lc, "memory slot too small"); 1364 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1365 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1366 1367 bool is_shortDisp = Displacement::is_shortDisp(disp); 1368 int store_offset = -1; 1369 1370 // For target len == 1 it's easy. 1371 if (lm == 1) { 1372 store_offset = offset(); 1373 if (is_shortDisp) { 1374 z_mvi(disp, base, imm); 1375 return store_offset; 1376 } else { 1377 z_mviy(disp, base, imm); 1378 return store_offset; 1379 } 1380 } 1381 1382 // All the "good stuff" takes an unsigned displacement. 1383 if (is_shortDisp) { 1384 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1385 1386 store_offset = offset(); 1387 switch (lm) { 1388 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1389 z_mvhhi(disp, base, imm); 1390 return store_offset; 1391 case 4: 1392 if (Immediate::is_simm16(imm)) { 1393 z_mvhi(disp, base, imm); 1394 return store_offset; 1395 } 1396 break; 1397 case 8: 1398 if (Immediate::is_simm16(imm)) { 1399 z_mvghi(disp, base, imm); 1400 return store_offset; 1401 } 1402 break; 1403 default: 1404 ShouldNotReachHere(); 1405 break; 1406 } 1407 } 1408 1409 // Can't optimize, so load value and store it. 1410 guarantee(scratch != noreg, " need a scratch register here !"); 1411 if (imm != 0) { 1412 load_const_optimized(scratch, imm); // Preserves CC anyway. 1413 } else { 1414 // Leave CC alone!! 1415 (void) clear_reg(scratch, true, false); // Indicate unused result. 1416 } 1417 1418 store_offset = offset(); 1419 if (is_shortDisp) { 1420 switch (lm) { 1421 case 2: 1422 z_sth(scratch, disp, Z_R0, base); 1423 return store_offset; 1424 case 4: 1425 z_st(scratch, disp, Z_R0, base); 1426 return store_offset; 1427 case 8: 1428 z_stg(scratch, disp, Z_R0, base); 1429 return store_offset; 1430 default: 1431 ShouldNotReachHere(); 1432 break; 1433 } 1434 } else { 1435 switch (lm) { 1436 case 2: 1437 z_sthy(scratch, disp, Z_R0, base); 1438 return store_offset; 1439 case 4: 1440 z_sty(scratch, disp, Z_R0, base); 1441 return store_offset; 1442 case 8: 1443 z_stg(scratch, disp, Z_R0, base); 1444 return store_offset; 1445 default: 1446 ShouldNotReachHere(); 1447 break; 1448 } 1449 } 1450 return -1; // should not reach here 1451 } 1452 1453 //=================================================================== 1454 //=== N O T P A T CH A B L E C O N S T A N T S === 1455 //=================================================================== 1456 1457 // Load constant x into register t with a fast instruction sequence 1458 // depending on the bits in x. Preserves CC under all circumstances. 1459 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1460 if (x == 0) { 1461 int len; 1462 if (emit) { 1463 len = clear_reg(t, true, false); 1464 } else { 1465 len = 4; 1466 } 1467 return len; 1468 } 1469 1470 if (Immediate::is_simm16(x)) { 1471 if (emit) { z_lghi(t, x); } 1472 return 4; 1473 } 1474 1475 // 64 bit value: | part1 | part2 | part3 | part4 | 1476 // At least one part is not zero! 1477 // Note: Right shift is only cleanly defined for unsigned types 1478 // or for signed types with nonnegative values. 1479 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1480 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1481 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1482 int part4 = (int)x & 0x0000ffff; 1483 int part12 = (int)((unsigned long)x >> 32); 1484 int part34 = (int)x; 1485 1486 // Lower word only (unsigned). 1487 if (part12 == 0) { 1488 if (part3 == 0) { 1489 if (emit) z_llill(t, part4); 1490 return 4; 1491 } 1492 if (part4 == 0) { 1493 if (emit) z_llilh(t, part3); 1494 return 4; 1495 } 1496 if (emit) z_llilf(t, part34); 1497 return 6; 1498 } 1499 1500 // Upper word only. 1501 if (part34 == 0) { 1502 if (part1 == 0) { 1503 if (emit) z_llihl(t, part2); 1504 return 4; 1505 } 1506 if (part2 == 0) { 1507 if (emit) z_llihh(t, part1); 1508 return 4; 1509 } 1510 if (emit) z_llihf(t, part12); 1511 return 6; 1512 } 1513 1514 // Lower word only (signed). 1515 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1516 if (emit) z_lgfi(t, part34); 1517 return 6; 1518 } 1519 1520 int len = 0; 1521 1522 if ((part1 == 0) || (part2 == 0)) { 1523 if (part1 == 0) { 1524 if (emit) z_llihl(t, part2); 1525 len += 4; 1526 } else { 1527 if (emit) z_llihh(t, part1); 1528 len += 4; 1529 } 1530 } else { 1531 if (emit) z_llihf(t, part12); 1532 len += 6; 1533 } 1534 1535 if ((part3 == 0) || (part4 == 0)) { 1536 if (part3 == 0) { 1537 if (emit) z_iill(t, part4); 1538 len += 4; 1539 } else { 1540 if (emit) z_iilh(t, part3); 1541 len += 4; 1542 } 1543 } else { 1544 if (emit) z_iilf(t, part34); 1545 len += 6; 1546 } 1547 return len; 1548 } 1549 1550 //===================================================================== 1551 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1552 //===================================================================== 1553 1554 // Note: In the worst case, one of the scratch registers is destroyed!!! 1555 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1556 // Right operand is constant. 1557 if (x2.is_constant()) { 1558 jlong value = x2.as_constant(); 1559 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1560 return; 1561 } 1562 1563 // Right operand is in register. 1564 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1565 } 1566 1567 // Note: In the worst case, one of the scratch registers is destroyed!!! 1568 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1569 // Right operand is constant. 1570 if (x2.is_constant()) { 1571 jlong value = x2.as_constant(); 1572 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1573 return; 1574 } 1575 1576 // Right operand is in register. 1577 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1578 } 1579 1580 // Note: In the worst case, one of the scratch registers is destroyed!!! 1581 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1582 // Right operand is constant. 1583 if (x2.is_constant()) { 1584 jlong value = x2.as_constant(); 1585 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1586 return; 1587 } 1588 1589 // Right operand is in register. 1590 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1591 } 1592 1593 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1594 // Right operand is constant. 1595 if (x2.is_constant()) { 1596 jlong value = x2.as_constant(); 1597 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1598 return; 1599 } 1600 1601 // Right operand is in register. 1602 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1603 } 1604 1605 // Generate an optimal branch to the branch target. 1606 // Optimal means that a relative branch (brc or brcl) is used if the 1607 // branch distance is short enough. Loading the target address into a 1608 // register and branching via reg is used as fallback only. 1609 // 1610 // Used registers: 1611 // Z_R1 - work reg. Holds branch target address. 1612 // Used in fallback case only. 1613 // 1614 // This version of branch_optimized is good for cases where the target address is known 1615 // and constant, i.e. is never changed (no relocation, no patching). 1616 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1617 address branch_origin = pc(); 1618 1619 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1620 z_brc(cond, branch_addr); 1621 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1622 z_brcl(cond, branch_addr); 1623 } else { 1624 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1625 z_bcr(cond, Z_R1); 1626 } 1627 } 1628 1629 // This version of branch_optimized is good for cases where the target address 1630 // is potentially not yet known at the time the code is emitted. 1631 // 1632 // One very common case is a branch to an unbound label which is handled here. 1633 // The caller might know (or hope) that the branch distance is short enough 1634 // to be encoded in a 16bit relative address. In this case he will pass a 1635 // NearLabel branch_target. 1636 // Care must be taken with unbound labels. Each call to target(label) creates 1637 // an entry in the patch queue for that label to patch all references of the label 1638 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1639 // an assertion fires at patch time. 1640 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1641 if (branch_target.is_bound()) { 1642 address branch_addr = target(branch_target); 1643 branch_optimized(cond, branch_addr); 1644 } else if (branch_target.is_near()) { 1645 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1646 } else { 1647 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1648 } 1649 } 1650 1651 // Generate an optimal compare and branch to the branch target. 1652 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1653 // branch distance is short enough. Loading the target address into a 1654 // register and branching via reg is used as fallback only. 1655 // 1656 // Input: 1657 // r1 - left compare operand 1658 // r2 - right compare operand 1659 void MacroAssembler::compare_and_branch_optimized(Register r1, 1660 Register r2, 1661 Assembler::branch_condition cond, 1662 address branch_addr, 1663 bool len64, 1664 bool has_sign) { 1665 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1666 1667 address branch_origin = pc(); 1668 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1669 switch (casenum) { 1670 case 0: z_crj( r1, r2, cond, branch_addr); break; 1671 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1672 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1673 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1674 default: ShouldNotReachHere(); break; 1675 } 1676 } else { 1677 switch (casenum) { 1678 case 0: z_cr( r1, r2); break; 1679 case 1: z_clr(r1, r2); break; 1680 case 2: z_cgr(r1, r2); break; 1681 case 3: z_clgr(r1, r2); break; 1682 default: ShouldNotReachHere(); break; 1683 } 1684 branch_optimized(cond, branch_addr); 1685 } 1686 } 1687 1688 // Generate an optimal compare and branch to the branch target. 1689 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1690 // branch distance is short enough. Loading the target address into a 1691 // register and branching via reg is used as fallback only. 1692 // 1693 // Input: 1694 // r1 - left compare operand (in register) 1695 // x2 - right compare operand (immediate) 1696 void MacroAssembler::compare_and_branch_optimized(Register r1, 1697 jlong x2, 1698 Assembler::branch_condition cond, 1699 Label& branch_target, 1700 bool len64, 1701 bool has_sign) { 1702 address branch_origin = pc(); 1703 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1704 bool is_RelAddr16 = branch_target.is_near() || 1705 (branch_target.is_bound() && 1706 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1707 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1708 1709 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1710 switch (casenum) { 1711 case 0: z_cij( r1, x2, cond, branch_target); break; 1712 case 1: z_clij(r1, x2, cond, branch_target); break; 1713 case 2: z_cgij(r1, x2, cond, branch_target); break; 1714 case 3: z_clgij(r1, x2, cond, branch_target); break; 1715 default: ShouldNotReachHere(); break; 1716 } 1717 return; 1718 } 1719 1720 if (x2 == 0) { 1721 switch (casenum) { 1722 case 0: z_ltr(r1, r1); break; 1723 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1724 case 2: z_ltgr(r1, r1); break; 1725 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1726 default: ShouldNotReachHere(); break; 1727 } 1728 } else { 1729 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1730 switch (casenum) { 1731 case 0: z_chi(r1, x2); break; 1732 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1733 case 2: z_cghi(r1, x2); break; 1734 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1735 default: break; 1736 } 1737 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1738 switch (casenum) { 1739 case 0: z_cfi( r1, x2); break; 1740 case 1: z_clfi(r1, x2); break; 1741 case 2: z_cgfi(r1, x2); break; 1742 case 3: z_clgfi(r1, x2); break; 1743 default: ShouldNotReachHere(); break; 1744 } 1745 } else { 1746 // No instruction with immediate operand possible, so load into register. 1747 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1748 load_const_optimized(scratch, x2); 1749 switch (casenum) { 1750 case 0: z_cr( r1, scratch); break; 1751 case 1: z_clr(r1, scratch); break; 1752 case 2: z_cgr(r1, scratch); break; 1753 case 3: z_clgr(r1, scratch); break; 1754 default: ShouldNotReachHere(); break; 1755 } 1756 } 1757 } 1758 branch_optimized(cond, branch_target); 1759 } 1760 1761 // Generate an optimal compare and branch to the branch target. 1762 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1763 // branch distance is short enough. Loading the target address into a 1764 // register and branching via reg is used as fallback only. 1765 // 1766 // Input: 1767 // r1 - left compare operand 1768 // r2 - right compare operand 1769 void MacroAssembler::compare_and_branch_optimized(Register r1, 1770 Register r2, 1771 Assembler::branch_condition cond, 1772 Label& branch_target, 1773 bool len64, 1774 bool has_sign) { 1775 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1776 1777 if (branch_target.is_bound()) { 1778 address branch_addr = target(branch_target); 1779 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1780 } else { 1781 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1782 switch (casenum) { 1783 case 0: z_crj( r1, r2, cond, branch_target); break; 1784 case 1: z_clrj( r1, r2, cond, branch_target); break; 1785 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1786 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1787 default: ShouldNotReachHere(); break; 1788 } 1789 } else { 1790 switch (casenum) { 1791 case 0: z_cr( r1, r2); break; 1792 case 1: z_clr(r1, r2); break; 1793 case 2: z_cgr(r1, r2); break; 1794 case 3: z_clgr(r1, r2); break; 1795 default: ShouldNotReachHere(); break; 1796 } 1797 branch_optimized(cond, branch_target); 1798 } 1799 } 1800 } 1801 1802 //=========================================================================== 1803 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1804 //=========================================================================== 1805 1806 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1807 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1808 int index = oop_recorder()->allocate_metadata_index(obj); 1809 RelocationHolder rspec = metadata_Relocation::spec(index); 1810 return AddressLiteral((address)obj, rspec); 1811 } 1812 1813 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1814 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1815 int index = oop_recorder()->find_index(obj); 1816 RelocationHolder rspec = metadata_Relocation::spec(index); 1817 return AddressLiteral((address)obj, rspec); 1818 } 1819 1820 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1821 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1822 int oop_index = oop_recorder()->allocate_oop_index(obj); 1823 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1824 } 1825 1826 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1827 assert(oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 1828 int oop_index = oop_recorder()->find_index(obj); 1829 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1830 } 1831 1832 // NOTE: destroys r 1833 void MacroAssembler::c2bool(Register r, Register t) { 1834 z_lcr(t, r); // t = -r 1835 z_or(r, t); // r = -r OR r 1836 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1837 } 1838 1839 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1840 // and return the resulting instruction. 1841 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1842 // relative positions. 1843 // Use correct argument types. Do not pre-calculate distance. 1844 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1845 int c = 0; 1846 unsigned long patched_inst = 0; 1847 if (is_call_pcrelative_short(inst) || 1848 is_branch_pcrelative_short(inst) || 1849 is_branchoncount_pcrelative_short(inst) || 1850 is_branchonindex32_pcrelative_short(inst)) { 1851 c = 1; 1852 int m = fmask(15, 0); // simm16(-1, 16, 32); 1853 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1854 patched_inst = (inst & ~m) | v; 1855 } else if (is_compareandbranch_pcrelative_short(inst)) { 1856 c = 2; 1857 long m = fmask(31, 16); // simm16(-1, 16, 48); 1858 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1859 patched_inst = (inst & ~m) | v; 1860 } else if (is_branchonindex64_pcrelative_short(inst)) { 1861 c = 3; 1862 long m = fmask(31, 16); // simm16(-1, 16, 48); 1863 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1864 patched_inst = (inst & ~m) | v; 1865 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1866 c = 4; 1867 long m = fmask(31, 0); // simm32(-1, 16, 48); 1868 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1869 patched_inst = (inst & ~m) | v; 1870 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1871 c = 5; 1872 long m = fmask(31, 0); // simm32(-1, 16, 48); 1873 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1874 patched_inst = (inst & ~m) | v; 1875 } else { 1876 print_dbg_msg(tty, inst, "not a relative branch", 0); 1877 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1878 ShouldNotReachHere(); 1879 } 1880 1881 long new_off = get_pcrel_offset(patched_inst); 1882 if (new_off != (dest_pos-inst_pos)) { 1883 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1884 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1885 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1886 #ifdef LUCY_DBG 1887 VM_Version::z_SIGSEGV(); 1888 #endif 1889 ShouldNotReachHere(); 1890 } 1891 return patched_inst; 1892 } 1893 1894 // Only called when binding labels (share/vm/asm/assembler.cpp) 1895 // Pass arguments as intended. Do not pre-calculate distance. 1896 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1897 unsigned long stub_inst; 1898 int inst_len = get_instruction(branch, &stub_inst); 1899 1900 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1901 } 1902 1903 1904 // Extract relative address (aka offset). 1905 // inv_simm16 works for 4-byte instructions only. 1906 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1907 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1908 1909 if (MacroAssembler::is_pcrelative_short(inst)) { 1910 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1911 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1912 } else { 1913 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1914 } 1915 } 1916 1917 if (MacroAssembler::is_pcrelative_long(inst)) { 1918 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1919 } 1920 1921 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1922 #ifdef LUCY_DBG 1923 VM_Version::z_SIGSEGV(); 1924 #else 1925 ShouldNotReachHere(); 1926 #endif 1927 return -1; 1928 } 1929 1930 long MacroAssembler::get_pcrel_offset(address pc) { 1931 unsigned long inst; 1932 unsigned int len = get_instruction(pc, &inst); 1933 1934 #ifdef ASSERT 1935 long offset; 1936 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1937 offset = get_pcrel_offset(inst); 1938 } else { 1939 offset = -1; 1940 } 1941 1942 if (offset == -1) { 1943 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1944 #ifdef LUCY_DBG 1945 VM_Version::z_SIGSEGV(); 1946 #else 1947 ShouldNotReachHere(); 1948 #endif 1949 } 1950 return offset; 1951 #else 1952 return get_pcrel_offset(inst); 1953 #endif // ASSERT 1954 } 1955 1956 // Get target address from pc-relative instructions. 1957 address MacroAssembler::get_target_addr_pcrel(address pc) { 1958 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1959 return pc + get_pcrel_offset(pc); 1960 } 1961 1962 // Patch pc relative load address. 1963 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1964 unsigned long inst; 1965 // Offset is +/- 2**32 -> use long. 1966 ptrdiff_t distance = con - pc; 1967 1968 get_instruction(pc, &inst); 1969 1970 if (is_pcrelative_short(inst)) { 1971 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1972 1973 // Some extra safety net. 1974 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1975 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1976 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1977 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1978 } 1979 return; 1980 } 1981 1982 if (is_pcrelative_long(inst)) { 1983 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1984 1985 // Some Extra safety net. 1986 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1987 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1988 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1989 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1990 } 1991 return; 1992 } 1993 1994 guarantee(false, "not a pcrelative instruction to patch!"); 1995 } 1996 1997 // "Current PC" here means the address just behind the basr instruction. 1998 address MacroAssembler::get_PC(Register result) { 1999 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 2000 return pc(); 2001 } 2002 2003 // Get current PC + offset. 2004 // Offset given in bytes, must be even! 2005 // "Current PC" here means the address of the larl instruction plus the given offset. 2006 address MacroAssembler::get_PC(Register result, int64_t offset) { 2007 address here = pc(); 2008 z_larl(result, offset/2); // Save target instruction address in result. 2009 return here + offset; 2010 } 2011 2012 void MacroAssembler::instr_size(Register size, Register pc) { 2013 // Extract 2 most significant bits of current instruction. 2014 z_llgc(size, Address(pc)); 2015 z_srl(size, 6); 2016 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 2017 z_ahi(size, 3); 2018 z_nill(size, 6); 2019 } 2020 2021 // Resize_frame with SP(new) = SP(old) - [offset]. 2022 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 2023 { 2024 assert_different_registers(offset, fp, Z_SP); 2025 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2026 2027 z_sgr(Z_SP, offset); 2028 z_stg(fp, _z_abi(callers_sp), Z_SP); 2029 } 2030 2031 // Resize_frame with SP(new) = [newSP] + offset. 2032 // This emitter is useful if we already have calculated a pointer 2033 // into the to-be-allocated stack space, e.g. with special alignment properties, 2034 // but need some additional space, e.g. for spilling. 2035 // newSP is the pre-calculated pointer. It must not be modified. 2036 // fp holds, or is filled with, the frame pointer. 2037 // offset is the additional increment which is added to addr to form the new SP. 2038 // Note: specify a negative value to reserve more space! 2039 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2040 // It does not guarantee that fp contains the frame pointer at the end. 2041 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2042 assert_different_registers(newSP, fp, Z_SP); 2043 2044 if (load_fp) { 2045 z_lg(fp, _z_abi(callers_sp), Z_SP); 2046 } 2047 2048 add2reg(Z_SP, offset, newSP); 2049 z_stg(fp, _z_abi(callers_sp), Z_SP); 2050 } 2051 2052 // Resize_frame with SP(new) = [newSP]. 2053 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2054 // It does not guarantee that fp contains the frame pointer at the end. 2055 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2056 assert_different_registers(newSP, fp, Z_SP); 2057 2058 if (load_fp) { 2059 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2060 } 2061 2062 z_lgr(Z_SP, newSP); 2063 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2064 z_stg(fp, _z_abi(callers_sp), newSP); 2065 } else { 2066 z_stg(fp, _z_abi(callers_sp), Z_SP); 2067 } 2068 } 2069 2070 // Resize_frame with SP(new) = SP(old) + offset. 2071 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2072 assert_different_registers(fp, Z_SP); 2073 2074 if (load_fp) { 2075 z_lg(fp, _z_abi(callers_sp), Z_SP); 2076 } 2077 add64(Z_SP, offset); 2078 z_stg(fp, _z_abi(callers_sp), Z_SP); 2079 } 2080 2081 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2082 #ifdef ASSERT 2083 assert_different_registers(bytes, old_sp, Z_SP); 2084 if (!copy_sp) { 2085 z_cgr(old_sp, Z_SP); 2086 asm_assert(bcondEqual, "[old_sp]!=[Z_SP]", 0x211); 2087 } 2088 #endif 2089 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2090 if (bytes_with_inverted_sign) { 2091 z_agr(Z_SP, bytes); 2092 } else { 2093 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2094 } 2095 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2096 } 2097 2098 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2099 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2100 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2101 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2102 2103 // We must not write outside the current stack bounds (given by Z_SP). 2104 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2105 // We rely on Z_R0 by default to be available as scratch. 2106 z_lgr(scratch, Z_SP); 2107 add2reg(Z_SP, -offset); 2108 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2109 #ifdef ASSERT 2110 // Just make sure nobody uses the value in the default scratch register. 2111 // When another register is used, the caller might rely on it containing the frame pointer. 2112 if (scratch == Z_R0) { 2113 z_iihf(scratch, 0xbaadbabe); 2114 z_iilf(scratch, 0xdeadbeef); 2115 } 2116 #endif 2117 return offset; 2118 } 2119 2120 // Push a frame of size `bytes' plus abi160 on top. 2121 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2122 BLOCK_COMMENT("push_frame_abi160 {"); 2123 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2124 BLOCK_COMMENT("} push_frame_abi160"); 2125 return res; 2126 } 2127 2128 // Pop current C frame. 2129 void MacroAssembler::pop_frame() { 2130 BLOCK_COMMENT("pop_frame {"); 2131 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2132 BLOCK_COMMENT("} pop_frame"); 2133 } 2134 2135 // Pop current C frame and restore return PC register (Z_R14). 2136 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2137 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2138 int retPC_offset = _z_common_abi(return_pc) + frame_size_in_bytes; 2139 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2140 if (Displacement::is_validDisp(retPC_offset)) { 2141 z_lg(Z_R14, retPC_offset, Z_SP); 2142 add2reg(Z_SP, frame_size_in_bytes); 2143 } else { 2144 add2reg(Z_SP, frame_size_in_bytes); 2145 restore_return_pc(); 2146 } 2147 } 2148 2149 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2150 if (allow_relocation) { 2151 call_c(entry_point); 2152 } else { 2153 call_c_static(entry_point); 2154 } 2155 } 2156 2157 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2158 bool allow_relocation = true; 2159 call_VM_leaf_base(entry_point, allow_relocation); 2160 } 2161 2162 int MacroAssembler::ic_check_size() { 2163 int ic_size = 24; 2164 if (!ImplicitNullChecks) { 2165 ic_size += 6; 2166 } 2167 if (UseCompactObjectHeaders) { 2168 ic_size += 12; 2169 } else { 2170 ic_size += 6; // either z_llgf or z_lg 2171 } 2172 return ic_size; 2173 } 2174 2175 int MacroAssembler::ic_check(int end_alignment) { 2176 Register R2_receiver = Z_ARG1; 2177 Register R0_scratch = Z_R0_scratch; 2178 Register R1_scratch = Z_R1_scratch; 2179 Register R9_data = Z_inline_cache; 2180 Label success, failure; 2181 2182 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 2183 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 2184 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 2185 // before the inline cache check here, and not after 2186 align(end_alignment, offset() + ic_check_size()); 2187 2188 int uep_offset = offset(); 2189 if (!ImplicitNullChecks) { 2190 z_cgij(R2_receiver, 0, Assembler::bcondEqual, failure); 2191 } 2192 2193 if (UseCompactObjectHeaders) { 2194 load_narrow_klass_compact(R1_scratch, R2_receiver); 2195 } else if (UseCompressedClassPointers) { 2196 z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2197 } else { 2198 z_lg(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes())); 2199 } 2200 z_cg(R1_scratch, Address(R9_data, in_bytes(CompiledICData::speculated_klass_offset()))); 2201 z_bre(success); 2202 2203 bind(failure); 2204 load_const(R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub())); 2205 z_br(R1_scratch); 2206 bind(success); 2207 2208 assert((offset() % end_alignment) == 0, "Misaligned verified entry point, offset() = %d, end_alignment = %d", offset(), end_alignment); 2209 return uep_offset; 2210 } 2211 2212 void MacroAssembler::call_VM_base(Register oop_result, 2213 Register last_java_sp, 2214 address entry_point, 2215 bool allow_relocation, 2216 bool check_exceptions) { // Defaults to true. 2217 // Allow_relocation indicates, if true, that the generated code shall 2218 // be fit for code relocation or referenced data relocation. In other 2219 // words: all addresses must be considered variable. PC-relative addressing 2220 // is not possible then. 2221 // On the other hand, if (allow_relocation == false), addresses and offsets 2222 // may be considered stable, enabling us to take advantage of some PC-relative 2223 // addressing tweaks. These might improve performance and reduce code size. 2224 2225 // Determine last_java_sp register. 2226 if (!last_java_sp->is_valid()) { 2227 last_java_sp = Z_SP; // Load Z_SP as SP. 2228 } 2229 2230 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2231 2232 // ARG1 must hold thread address. 2233 z_lgr(Z_ARG1, Z_thread); 2234 2235 address return_pc = nullptr; 2236 if (allow_relocation) { 2237 return_pc = call_c(entry_point); 2238 } else { 2239 return_pc = call_c_static(entry_point); 2240 } 2241 2242 reset_last_Java_frame(allow_relocation); 2243 2244 // C++ interp handles this in the interpreter. 2245 check_and_handle_popframe(Z_thread); 2246 check_and_handle_earlyret(Z_thread); 2247 2248 // Check for pending exceptions. 2249 if (check_exceptions) { 2250 // Check for pending exceptions (java_thread is set upon return). 2251 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2252 2253 // This used to conditionally jump to forward_exception however it is 2254 // possible if we relocate that the branch will not reach. So we must jump 2255 // around so we can always reach. 2256 2257 Label ok; 2258 z_bre(ok); // Bcondequal is the same as bcondZero. 2259 call_stub(StubRoutines::forward_exception_entry()); 2260 bind(ok); 2261 } 2262 2263 // Get oop result if there is one and reset the value in the thread. 2264 if (oop_result->is_valid()) { 2265 get_vm_result(oop_result); 2266 } 2267 2268 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2269 } 2270 2271 void MacroAssembler::call_VM_base(Register oop_result, 2272 Register last_java_sp, 2273 address entry_point, 2274 bool check_exceptions) { // Defaults to true. 2275 bool allow_relocation = true; 2276 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2277 } 2278 2279 // VM calls without explicit last_java_sp. 2280 2281 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2282 // Call takes possible detour via InterpreterMacroAssembler. 2283 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2284 } 2285 2286 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2287 // Z_ARG1 is reserved for the thread. 2288 lgr_if_needed(Z_ARG2, arg_1); 2289 call_VM(oop_result, entry_point, check_exceptions); 2290 } 2291 2292 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2293 // Z_ARG1 is reserved for the thread. 2294 assert_different_registers(arg_2, Z_ARG2); 2295 lgr_if_needed(Z_ARG2, arg_1); 2296 lgr_if_needed(Z_ARG3, arg_2); 2297 call_VM(oop_result, entry_point, check_exceptions); 2298 } 2299 2300 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2301 Register arg_3, bool check_exceptions) { 2302 // Z_ARG1 is reserved for the thread. 2303 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2304 assert_different_registers(arg_2, Z_ARG2); 2305 lgr_if_needed(Z_ARG2, arg_1); 2306 lgr_if_needed(Z_ARG3, arg_2); 2307 lgr_if_needed(Z_ARG4, arg_3); 2308 call_VM(oop_result, entry_point, check_exceptions); 2309 } 2310 2311 // VM static calls without explicit last_java_sp. 2312 2313 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2314 // Call takes possible detour via InterpreterMacroAssembler. 2315 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2316 } 2317 2318 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2319 Register arg_3, bool check_exceptions) { 2320 // Z_ARG1 is reserved for the thread. 2321 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2322 assert_different_registers(arg_2, Z_ARG2); 2323 lgr_if_needed(Z_ARG2, arg_1); 2324 lgr_if_needed(Z_ARG3, arg_2); 2325 lgr_if_needed(Z_ARG4, arg_3); 2326 call_VM_static(oop_result, entry_point, check_exceptions); 2327 } 2328 2329 // VM calls with explicit last_java_sp. 2330 2331 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2332 // Call takes possible detour via InterpreterMacroAssembler. 2333 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2334 } 2335 2336 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2337 // Z_ARG1 is reserved for the thread. 2338 lgr_if_needed(Z_ARG2, arg_1); 2339 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2340 } 2341 2342 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2343 Register arg_2, bool check_exceptions) { 2344 // Z_ARG1 is reserved for the thread. 2345 assert_different_registers(arg_2, Z_ARG2); 2346 lgr_if_needed(Z_ARG2, arg_1); 2347 lgr_if_needed(Z_ARG3, arg_2); 2348 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2349 } 2350 2351 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2352 Register arg_2, Register arg_3, bool check_exceptions) { 2353 // Z_ARG1 is reserved for the thread. 2354 assert_different_registers(arg_3, Z_ARG2, Z_ARG3); 2355 assert_different_registers(arg_2, Z_ARG2); 2356 lgr_if_needed(Z_ARG2, arg_1); 2357 lgr_if_needed(Z_ARG3, arg_2); 2358 lgr_if_needed(Z_ARG4, arg_3); 2359 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2360 } 2361 2362 // VM leaf calls. 2363 2364 void MacroAssembler::call_VM_leaf(address entry_point) { 2365 // Call takes possible detour via InterpreterMacroAssembler. 2366 call_VM_leaf_base(entry_point, true); 2367 } 2368 2369 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2370 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2371 call_VM_leaf(entry_point); 2372 } 2373 2374 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2375 assert_different_registers(arg_2, Z_ARG1); 2376 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2377 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2378 call_VM_leaf(entry_point); 2379 } 2380 2381 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2382 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2383 assert_different_registers(arg_2, Z_ARG1); 2384 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2385 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2386 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2387 call_VM_leaf(entry_point); 2388 } 2389 2390 // Static VM leaf calls. 2391 // Really static VM leaf calls are never patched. 2392 2393 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2394 // Call takes possible detour via InterpreterMacroAssembler. 2395 call_VM_leaf_base(entry_point, false); 2396 } 2397 2398 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2399 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2400 call_VM_leaf_static(entry_point); 2401 } 2402 2403 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2404 assert_different_registers(arg_2, Z_ARG1); 2405 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2406 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2407 call_VM_leaf_static(entry_point); 2408 } 2409 2410 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2411 assert_different_registers(arg_3, Z_ARG1, Z_ARG2); 2412 assert_different_registers(arg_2, Z_ARG1); 2413 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2414 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2415 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2416 call_VM_leaf_static(entry_point); 2417 } 2418 2419 // Don't use detour via call_c(reg). 2420 address MacroAssembler::call_c(address function_entry) { 2421 load_const(Z_R1, function_entry); 2422 return call(Z_R1); 2423 } 2424 2425 // Variant for really static (non-relocatable) calls which are never patched. 2426 address MacroAssembler::call_c_static(address function_entry) { 2427 load_absolute_address(Z_R1, function_entry); 2428 #if 0 // def ASSERT 2429 // Verify that call site did not move. 2430 load_const_optimized(Z_R0, function_entry); 2431 z_cgr(Z_R1, Z_R0); 2432 z_brc(bcondEqual, 3); 2433 z_illtrap(0xba); 2434 #endif 2435 return call(Z_R1); 2436 } 2437 2438 address MacroAssembler::call_c_opt(address function_entry) { 2439 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2440 _last_calls_return_pc = success ? pc() : nullptr; 2441 return _last_calls_return_pc; 2442 } 2443 2444 // Identify a call_far_patchable instruction: LARL + LG + BASR 2445 // 2446 // nop ; optionally, if required for alignment 2447 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2448 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2449 // 2450 // Code pattern will eventually get patched into variant2 (see below for detection code). 2451 // 2452 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2453 address iaddr = instruction_addr; 2454 2455 // Check for the actual load instruction. 2456 if (!is_load_const_from_toc(iaddr)) { return false; } 2457 iaddr += load_const_from_toc_size(); 2458 2459 // Check for the call (BASR) instruction, finally. 2460 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2461 return is_call_byregister(iaddr); 2462 } 2463 2464 // Identify a call_far_patchable instruction: BRASL 2465 // 2466 // Code pattern to suits atomic patching: 2467 // nop ; Optionally, if required for alignment. 2468 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2469 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2470 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2471 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2472 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2473 2474 // Check for correct number of leading nops. 2475 address iaddr; 2476 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2477 if (!is_z_nop(iaddr)) { return false; } 2478 } 2479 assert(iaddr == call_addr, "sanity"); 2480 2481 // --> Check for call instruction. 2482 if (is_call_far_pcrelative(call_addr)) { 2483 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2484 return true; 2485 } 2486 2487 return false; 2488 } 2489 2490 // Emit a NOT mt-safely patchable 64 bit absolute call. 2491 // If toc_offset == -2, then the destination of the call (= target) is emitted 2492 // to the constant pool and a runtime_call relocation is added 2493 // to the code buffer. 2494 // If toc_offset != -2, target must already be in the constant pool at 2495 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2496 // from the runtime_call relocation). 2497 // Special handling of emitting to scratch buffer when there is no constant pool. 2498 // Slightly changed code pattern. We emit an additional nop if we would 2499 // not end emitting at a word aligned address. This is to ensure 2500 // an atomically patchable displacement in brasl instructions. 2501 // 2502 // A call_far_patchable comes in different flavors: 2503 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2504 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2505 // - BRASL (relative address of call target coded in instruction) 2506 // All flavors occupy the same amount of space. Length differences are compensated 2507 // by leading nops, such that the instruction sequence always ends at the same 2508 // byte offset. This is required to keep the return offset constant. 2509 // Furthermore, the return address (the end of the instruction sequence) is forced 2510 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2511 // need to patch the call target of the BRASL flavor. 2512 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2513 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2514 // Get current pc and ensure word alignment for end of instr sequence. 2515 const address start_pc = pc(); 2516 const intptr_t start_off = offset(); 2517 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2518 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2519 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2520 const bool emit_relative_call = !emit_target_to_pool && 2521 RelAddr::is_in_range_of_RelAddr32(dist) && 2522 ReoptimizeCallSequences && 2523 !code_section()->scratch_emit(); 2524 2525 if (emit_relative_call) { 2526 // Add padding to get the same size as below. 2527 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2528 unsigned int current_padding; 2529 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2530 assert(current_padding == padding, "sanity"); 2531 2532 // relative call: len = 2(nop) + 6 (brasl) 2533 // CodeBlob resize cannot occur in this case because 2534 // this call is emitted into pre-existing space. 2535 z_nop(); // Prepend each BRASL with a nop. 2536 z_brasl(Z_R14, target); 2537 } else { 2538 // absolute call: Get address from TOC. 2539 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2540 if (emit_target_to_pool) { 2541 // When emitting the call for the first time, we do not need to use 2542 // the pc-relative version. It will be patched anyway, when the code 2543 // buffer is copied. 2544 // Relocation is not needed when !ReoptimizeCallSequences. 2545 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2546 AddressLiteral dest(target, rt); 2547 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2548 // inst_mark(). Reset if possible. 2549 bool reset_mark = (inst_mark() == pc()); 2550 tocOffset = store_oop_in_toc(dest); 2551 if (reset_mark) { set_inst_mark(); } 2552 if (tocOffset == -1) { 2553 return false; // Couldn't create constant pool entry. 2554 } 2555 } 2556 assert(offset() == start_off, "emit no code before this point!"); 2557 2558 address tocPos = pc() + tocOffset; 2559 if (emit_target_to_pool) { 2560 tocPos = code()->consts()->start() + tocOffset; 2561 } 2562 load_long_pcrelative(Z_R14, tocPos); 2563 z_basr(Z_R14, Z_R14); 2564 } 2565 2566 #ifdef ASSERT 2567 // Assert that we can identify the emitted call. 2568 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2569 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2570 2571 if (emit_target_to_pool) { 2572 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2573 "wrong encoding of dest address"); 2574 } 2575 #endif 2576 return true; // success 2577 } 2578 2579 // Identify a call_far_patchable instruction. 2580 // For more detailed information see header comment of call_far_patchable. 2581 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2582 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2583 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2584 } 2585 2586 // Does the call_far_patchable instruction use a pc-relative encoding 2587 // of the call destination? 2588 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2589 // Variant 2 is pc-relative. 2590 return is_call_far_patchable_variant2_at(instruction_addr); 2591 } 2592 2593 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2594 // Prepend each BRASL with a nop. 2595 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2596 } 2597 2598 // Set destination address of a call_far_patchable instruction. 2599 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2600 ResourceMark rm; 2601 2602 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2603 int code_size = MacroAssembler::call_far_patchable_size(); 2604 CodeBuffer buf(instruction_addr, code_size); 2605 MacroAssembler masm(&buf); 2606 masm.call_far_patchable(dest, tocOffset); 2607 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2608 } 2609 2610 // Get dest address of a call_far_patchable instruction. 2611 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2612 // Dynamic TOC: absolute address in constant pool. 2613 // Check variant2 first, it is more frequent. 2614 2615 // Relative address encoded in call instruction. 2616 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2617 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2618 2619 // Absolute address in constant pool. 2620 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2621 address iaddr = instruction_addr; 2622 2623 long tocOffset = get_load_const_from_toc_offset(iaddr); 2624 address tocLoc = iaddr + tocOffset; 2625 return *(address *)(tocLoc); 2626 } else { 2627 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2628 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2629 *(unsigned long*)instruction_addr, 2630 *(unsigned long*)(instruction_addr+8), 2631 call_far_patchable_size()); 2632 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2633 ShouldNotReachHere(); 2634 return nullptr; 2635 } 2636 } 2637 2638 void MacroAssembler::align_call_far_patchable(address pc) { 2639 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2640 } 2641 2642 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2643 } 2644 2645 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2646 } 2647 2648 // Read from the polling page. 2649 // Use TM or TMY instruction, depending on read offset. 2650 // offset = 0: Use TM, safepoint polling. 2651 // offset < 0: Use TMY, profiling safepoint polling. 2652 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2653 if (Immediate::is_uimm12(offset)) { 2654 z_tm(offset, polling_page_address, mask_safepoint); 2655 } else { 2656 z_tmy(offset, polling_page_address, mask_profiling); 2657 } 2658 } 2659 2660 // Check whether z_instruction is a read access to the polling page 2661 // which was emitted by load_from_polling_page(..). 2662 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2663 unsigned long z_instruction; 2664 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2665 2666 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2667 2668 if (ilen == 4) { 2669 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2670 2671 int ms = inv_mask(z_instruction,8,32); // mask 2672 int ra = inv_reg(z_instruction,16,32); // base register 2673 int ds = inv_uimm12(z_instruction); // displacement 2674 2675 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2676 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2677 } 2678 2679 } else { /* if (ilen == 6) */ 2680 2681 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2682 2683 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2684 2685 int ms = inv_mask(z_instruction,8,48); // mask 2686 int ra = inv_reg(z_instruction,16,48); // base register 2687 int ds = inv_simm20(z_instruction); // displacement 2688 } 2689 2690 return true; 2691 } 2692 2693 // Extract poll address from instruction and ucontext. 2694 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2695 assert(ucontext != nullptr, "must have ucontext"); 2696 ucontext_t* uc = (ucontext_t*) ucontext; 2697 unsigned long z_instruction; 2698 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2699 2700 if (ilen == 4 && is_z_tm(z_instruction)) { 2701 int ra = inv_reg(z_instruction, 16, 32); // base register 2702 int ds = inv_uimm12(z_instruction); // displacement 2703 address addr = (address)uc->uc_mcontext.gregs[ra]; 2704 return addr + ds; 2705 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2706 int ra = inv_reg(z_instruction, 16, 48); // base register 2707 int ds = inv_simm20(z_instruction); // displacement 2708 address addr = (address)uc->uc_mcontext.gregs[ra]; 2709 return addr + ds; 2710 } 2711 2712 ShouldNotReachHere(); 2713 return nullptr; 2714 } 2715 2716 // Extract poll register from instruction. 2717 uint MacroAssembler::get_poll_register(address instr_loc) { 2718 unsigned long z_instruction; 2719 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2720 2721 if (ilen == 4 && is_z_tm(z_instruction)) { 2722 return (uint)inv_reg(z_instruction, 16, 32); // base register 2723 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2724 return (uint)inv_reg(z_instruction, 16, 48); // base register 2725 } 2726 2727 ShouldNotReachHere(); 2728 return 0; 2729 } 2730 2731 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2732 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2733 // Armed page has poll_bit set. 2734 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2735 z_brnaz(slow_path); 2736 } 2737 2738 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2739 void MacroAssembler::bang_stack_with_offset(int offset) { 2740 // Stack grows down, caller passes positive offset. 2741 assert(offset > 0, "must bang with positive offset"); 2742 if (Displacement::is_validDisp(-offset)) { 2743 z_tmy(-offset, Z_SP, mask_stackbang); 2744 } else { 2745 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2746 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2747 } 2748 } 2749 2750 void MacroAssembler::reserved_stack_check(Register return_pc) { 2751 // Test if reserved zone needs to be enabled. 2752 Label no_reserved_zone_enabling; 2753 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2754 BLOCK_COMMENT("reserved_stack_check {"); 2755 2756 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2757 z_brl(no_reserved_zone_enabling); 2758 2759 // Enable reserved zone again, throw stack overflow exception. 2760 save_return_pc(); 2761 push_frame_abi160(0); 2762 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2763 pop_frame(); 2764 restore_return_pc(); 2765 2766 load_const_optimized(Z_R1, SharedRuntime::throw_delayed_StackOverflowError_entry()); 2767 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2768 z_br(Z_R1); 2769 2770 should_not_reach_here(); 2771 2772 bind(no_reserved_zone_enabling); 2773 BLOCK_COMMENT("} reserved_stack_check"); 2774 } 2775 2776 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2777 void MacroAssembler::tlab_allocate(Register obj, 2778 Register var_size_in_bytes, 2779 int con_size_in_bytes, 2780 Register t1, 2781 Label& slow_case) { 2782 assert_different_registers(obj, var_size_in_bytes, t1); 2783 Register end = t1; 2784 Register thread = Z_thread; 2785 2786 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2787 if (var_size_in_bytes == noreg) { 2788 z_lay(end, Address(obj, con_size_in_bytes)); 2789 } else { 2790 z_lay(end, Address(obj, var_size_in_bytes)); 2791 } 2792 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2793 branch_optimized(bcondHigh, slow_case); 2794 2795 // Update the tlab top pointer. 2796 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2797 2798 // Recover var_size_in_bytes if necessary. 2799 if (var_size_in_bytes == end) { 2800 z_sgr(var_size_in_bytes, obj); 2801 } 2802 } 2803 2804 // Emitter for interface method lookup. 2805 // input: recv_klass, intf_klass, itable_index 2806 // output: method_result 2807 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2808 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2809 // If the register is still not needed then, remove it. 2810 void MacroAssembler::lookup_interface_method(Register recv_klass, 2811 Register intf_klass, 2812 RegisterOrConstant itable_index, 2813 Register method_result, 2814 Register temp1_reg, 2815 Label& no_such_interface, 2816 bool return_method) { 2817 2818 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2819 const Register itable_entry_addr = Z_R1_scratch; 2820 const Register itable_interface = Z_R0_scratch; 2821 2822 BLOCK_COMMENT("lookup_interface_method {"); 2823 2824 // Load start of itable entries into itable_entry_addr. 2825 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2826 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2827 2828 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2829 add2reg_with_index(itable_entry_addr, 2830 in_bytes(Klass::vtable_start_offset() + itableOffsetEntry::interface_offset()), 2831 recv_klass, vtable_len); 2832 2833 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2834 Label search; 2835 2836 bind(search); 2837 2838 // Handle IncompatibleClassChangeError. 2839 // If the entry is null then we've reached the end of the table 2840 // without finding the expected interface, so throw an exception. 2841 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2842 z_bre(no_such_interface); 2843 2844 add2reg(itable_entry_addr, itable_offset_search_inc); 2845 z_cgr(itable_interface, intf_klass); 2846 z_brne(search); 2847 2848 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2849 if (return_method) { 2850 const int vtable_offset_offset = in_bytes(itableOffsetEntry::offset_offset() - 2851 itableOffsetEntry::interface_offset()) - 2852 itable_offset_search_inc; 2853 2854 // Compute itableMethodEntry and get method and entry point 2855 // we use addressing with index and displacement, since the formula 2856 // for computing the entry's offset has a fixed and a dynamic part, 2857 // the latter depending on the matched interface entry and on the case, 2858 // that the itable index has been passed as a register, not a constant value. 2859 int method_offset = in_bytes(itableMethodEntry::method_offset()); 2860 // Fixed part (displacement), common operand. 2861 Register itable_offset = method_result; // Dynamic part (index register). 2862 2863 if (itable_index.is_register()) { 2864 // Compute the method's offset in that register, for the formula, see the 2865 // else-clause below. 2866 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2867 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2868 } else { 2869 // Displacement increases. 2870 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2871 2872 // Load index from itable. 2873 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2874 } 2875 2876 // Finally load the method's oop. 2877 z_lg(method_result, method_offset, itable_offset, recv_klass); 2878 } 2879 BLOCK_COMMENT("} lookup_interface_method"); 2880 } 2881 2882 // Lookup for virtual method invocation. 2883 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2884 RegisterOrConstant vtable_index, 2885 Register method_result) { 2886 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2887 assert(vtableEntry::size() * wordSize == wordSize, 2888 "else adjust the scaling in the code below"); 2889 2890 BLOCK_COMMENT("lookup_virtual_method {"); 2891 2892 const int base = in_bytes(Klass::vtable_start_offset()); 2893 2894 if (vtable_index.is_constant()) { 2895 // Load with base + disp. 2896 Address vtable_entry_addr(recv_klass, 2897 vtable_index.as_constant() * wordSize + 2898 base + 2899 in_bytes(vtableEntry::method_offset())); 2900 2901 z_lg(method_result, vtable_entry_addr); 2902 } else { 2903 // Shift index properly and load with base + index + disp. 2904 Register vindex = vtable_index.as_register(); 2905 Address vtable_entry_addr(recv_klass, vindex, 2906 base + in_bytes(vtableEntry::method_offset())); 2907 2908 z_sllg(vindex, vindex, exact_log2(wordSize)); 2909 z_lg(method_result, vtable_entry_addr); 2910 } 2911 BLOCK_COMMENT("} lookup_virtual_method"); 2912 } 2913 2914 // Factor out code to call ic_miss_handler. 2915 // Generate code to call the inline cache miss handler. 2916 // 2917 // In most cases, this code will be generated out-of-line. 2918 // The method parameters are intended to provide some variability. 2919 // ICM - Label which has to be bound to the start of useful code (past any traps). 2920 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2921 // Any value except 0x00 is supported. 2922 // = 0x00 - do not generate illtrap instructions. 2923 // use nops to fill unused space. 2924 // requiredSize - required size of the generated code. If the actually 2925 // generated code is smaller, use padding instructions to fill up. 2926 // = 0 - no size requirement, no padding. 2927 // scratch - scratch register to hold branch target address. 2928 // 2929 // The method returns the code offset of the bound label. 2930 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2931 intptr_t startOffset = offset(); 2932 2933 // Prevent entry at content_begin(). 2934 if (trapMarker != 0) { 2935 z_illtrap(trapMarker); 2936 } 2937 2938 // Load address of inline cache miss code into scratch register 2939 // and branch to cache miss handler. 2940 BLOCK_COMMENT("IC miss handler {"); 2941 BIND(ICM); 2942 unsigned int labelOffset = offset(); 2943 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2944 2945 load_const_optimized(scratch, icmiss); 2946 z_br(scratch); 2947 2948 // Fill unused space. 2949 if (requiredSize > 0) { 2950 while ((offset() - startOffset) < requiredSize) { 2951 if (trapMarker == 0) { 2952 z_nop(); 2953 } else { 2954 z_illtrap(trapMarker); 2955 } 2956 } 2957 } 2958 BLOCK_COMMENT("} IC miss handler"); 2959 return labelOffset; 2960 } 2961 2962 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2963 Register ic_reg = Z_inline_cache; 2964 int klass_offset = oopDesc::klass_offset_in_bytes(); 2965 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2966 if (VM_Version::has_CompareBranch()) { 2967 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2968 } else { 2969 z_ltgr(Z_ARG1, Z_ARG1); 2970 z_bre(ic_miss); 2971 } 2972 } 2973 // Compare cached class against klass from receiver. 2974 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2975 z_brne(ic_miss); 2976 } 2977 2978 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2979 Register super_klass, 2980 Register temp1_reg, 2981 Label* L_success, 2982 Label* L_failure, 2983 Label* L_slow_path, 2984 RegisterOrConstant super_check_offset) { 2985 2986 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2987 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2988 2989 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2990 bool need_slow_path = (must_load_sco || 2991 super_check_offset.constant_or_zero() == sc_offset); 2992 2993 // Input registers must not overlap. 2994 assert_different_registers(sub_klass, super_klass, temp1_reg); 2995 if (super_check_offset.is_register()) { 2996 assert_different_registers(sub_klass, super_klass, 2997 super_check_offset.as_register()); 2998 } else if (must_load_sco) { 2999 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 3000 } 3001 3002 const Register Rsuper_check_offset = temp1_reg; 3003 3004 NearLabel L_fallthrough; 3005 int label_nulls = 0; 3006 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3007 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3008 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3009 assert(label_nulls <= 1 || 3010 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 3011 "at most one null in the batch, usually"); 3012 3013 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 3014 // If the pointers are equal, we are done (e.g., String[] elements). 3015 // This self-check enables sharing of secondary supertype arrays among 3016 // non-primary types such as array-of-interface. Otherwise, each such 3017 // type would need its own customized SSA. 3018 // We move this check to the front of the fast path because many 3019 // type checks are in fact trivially successful in this manner, 3020 // so we get a nicely predicted branch right at the start of the check. 3021 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 3022 3023 // Check the supertype display, which is uint. 3024 if (must_load_sco) { 3025 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 3026 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 3027 } 3028 Address super_check_addr(sub_klass, super_check_offset, 0); 3029 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 3030 3031 // This check has worked decisively for primary supers. 3032 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3033 // (Secondary supers are interfaces and very deeply nested subtypes.) 3034 // This works in the same check above because of a tricky aliasing 3035 // between the super_cache and the primary super display elements. 3036 // (The 'super_check_addr' can address either, as the case requires.) 3037 // Note that the cache is updated below if it does not help us find 3038 // what we need immediately. 3039 // So if it was a primary super, we can just fail immediately. 3040 // Otherwise, it's the slow path for us (no success at this point). 3041 3042 // Hacked jmp, which may only be used just before L_fallthrough. 3043 #define final_jmp(label) \ 3044 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3045 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 3046 3047 if (super_check_offset.is_register()) { 3048 branch_optimized(Assembler::bcondEqual, *L_success); 3049 z_cfi(super_check_offset.as_register(), sc_offset); 3050 if (L_failure == &L_fallthrough) { 3051 branch_optimized(Assembler::bcondEqual, *L_slow_path); 3052 } else { 3053 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3054 final_jmp(*L_slow_path); 3055 } 3056 } else if (super_check_offset.as_constant() == sc_offset) { 3057 // Need a slow path; fast failure is impossible. 3058 if (L_slow_path == &L_fallthrough) { 3059 branch_optimized(Assembler::bcondEqual, *L_success); 3060 } else { 3061 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 3062 final_jmp(*L_success); 3063 } 3064 } else { 3065 // No slow path; it's a fast decision. 3066 if (L_failure == &L_fallthrough) { 3067 branch_optimized(Assembler::bcondEqual, *L_success); 3068 } else { 3069 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3070 final_jmp(*L_success); 3071 } 3072 } 3073 3074 bind(L_fallthrough); 3075 #undef local_brc 3076 #undef final_jmp 3077 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3078 // fallthru (to slow path) 3079 } 3080 3081 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3082 Register Rsuperklass, 3083 Register Rarray_ptr, // tmp 3084 Register Rlength, // tmp 3085 Label* L_success, 3086 Label* L_failure) { 3087 // Input registers must not overlap. 3088 // Also check for R1 which is explicitly used here. 3089 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3090 NearLabel L_fallthrough; 3091 int label_nulls = 0; 3092 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3093 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3094 assert(label_nulls <= 1, "at most one null in the batch"); 3095 3096 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3097 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3098 3099 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3100 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3101 3102 // Hacked jmp, which may only be used just before L_fallthrough. 3103 #define final_jmp(label) \ 3104 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3105 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3106 3107 NearLabel loop_iterate, loop_count, match; 3108 3109 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3110 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3111 3112 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3113 branch_optimized(Assembler::bcondZero, *L_failure); 3114 3115 // Oops in table are NO MORE compressed. 3116 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3117 z_bre(match); // Shortcut for array length = 1. 3118 3119 // No match yet, so we must walk the array's elements. 3120 z_lngfr(Rlength, Rlength); 3121 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3122 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3123 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3124 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3125 z_bru(loop_count); 3126 3127 BIND(loop_iterate); 3128 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3129 z_bre(match); 3130 BIND(loop_count); 3131 z_brxlg(Rlength, Z_R1, loop_iterate); 3132 3133 // Rsuperklass not found among secondary super classes -> failure. 3134 branch_optimized(Assembler::bcondAlways, *L_failure); 3135 3136 // Got a hit. Return success (zero result). Set cache. 3137 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3138 3139 BIND(match); 3140 3141 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3142 3143 final_jmp(*L_success); 3144 3145 // Exit to the surrounding code. 3146 BIND(L_fallthrough); 3147 #undef local_brc 3148 #undef final_jmp 3149 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3150 } 3151 3152 // Emitter for combining fast and slow path. 3153 void MacroAssembler::check_klass_subtype(Register sub_klass, 3154 Register super_klass, 3155 Register temp1_reg, 3156 Register temp2_reg, 3157 Label& L_success) { 3158 NearLabel failure; 3159 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3160 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3161 &L_success, &failure, nullptr); 3162 check_klass_subtype_slow_path(sub_klass, super_klass, 3163 temp1_reg, temp2_reg, &L_success, nullptr); 3164 BIND(failure); 3165 BLOCK_COMMENT("} check_klass_subtype"); 3166 } 3167 3168 // scans r_count pointer sized words at [r_addr] for occurrence of r_value, 3169 // generic (r_count must be >0) 3170 // iff found: CC eq, r_result == 0 3171 void MacroAssembler::repne_scan(Register r_addr, Register r_value, Register r_count, Register r_result) { 3172 NearLabel L_loop, L_exit; 3173 3174 BLOCK_COMMENT("repne_scan {"); 3175 #ifdef ASSERT 3176 z_chi(r_count, 0); 3177 asm_assert(bcondHigh, "count must be positive", 11); 3178 #endif 3179 3180 clear_reg(r_result, true /* whole_reg */, false /* set_cc */); // sets r_result=0, let's hope that search will be successful 3181 3182 bind(L_loop); 3183 z_cg(r_value, Address(r_addr)); 3184 z_bre(L_exit); // branch on success 3185 z_la(r_addr, wordSize, r_addr); 3186 z_brct(r_count, L_loop); 3187 3188 // z_brct above doesn't change CC. 3189 // If we reach here, then the value in r_value is not present. Set r_result to 1. 3190 z_lghi(r_result, 1); 3191 3192 bind(L_exit); 3193 BLOCK_COMMENT("} repne_scan"); 3194 } 3195 3196 // Ensure that the inline code and the stub are using the same registers. 3197 #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ 3198 do { \ 3199 assert(r_super_klass == Z_ARG1 && \ 3200 r_array_base == Z_ARG5 && \ 3201 r_array_length == Z_ARG4 && \ 3202 (r_array_index == Z_ARG3 || r_array_index == noreg) && \ 3203 (r_sub_klass == Z_ARG2 || r_sub_klass == noreg) && \ 3204 (r_bitmap == Z_R10 || r_bitmap == noreg) && \ 3205 (r_result == Z_R11 || r_result == noreg), "registers must match s390.ad"); \ 3206 } while(0) 3207 3208 // Note: this method also kills Z_R1_scratch register on machines older than z15 3209 void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, 3210 Register r_super_klass, 3211 Register r_temp1, 3212 Register r_temp2, 3213 Register r_temp3, 3214 Register r_temp4, 3215 Register r_result, 3216 u1 super_klass_slot) { 3217 NearLabel L_done, L_failure; 3218 3219 BLOCK_COMMENT("lookup_secondary_supers_table {"); 3220 3221 const Register 3222 r_array_base = r_temp1, 3223 r_array_length = r_temp2, 3224 r_array_index = r_temp3, 3225 r_bitmap = r_temp4; 3226 3227 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; 3228 3229 z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); 3230 3231 // First check the bitmap to see if super_klass might be present. If 3232 // the bit is zero, we are certain that super_klass is not one of 3233 // the secondary supers. 3234 u1 bit = super_klass_slot; 3235 int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit; 3236 3237 z_sllg(r_array_index, r_bitmap, shift_count); // take the bit to 63rd location 3238 3239 // Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded 3240 // with 1 (failure) at the end of this method. 3241 clear_reg(r_result, true /* whole_reg */, false /* set_cc */); // r_result = 0 3242 3243 // We test the MSB of r_array_index, i.e., its sign bit 3244 testbit(r_array_index, 63); 3245 z_bfalse(L_failure); // if not set, then jump!!! 3246 3247 // We will consult the secondary-super array. 3248 z_lg(r_array_base, Address(r_sub_klass, Klass::secondary_supers_offset())); 3249 3250 // The value i in r_array_index is >= 1, so even though r_array_base 3251 // points to the length, we don't need to adjust it to point to the 3252 // data. 3253 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code"); 3254 3255 // Get the first array index that can contain super_klass. 3256 if (bit != 0) { 3257 pop_count_long(r_array_index, r_array_index, Z_R1_scratch); // kills Z_R1_scratch on machines older than z15 3258 3259 // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. 3260 z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale 3261 } else { 3262 // Actually use index 0, but r_array_base and r_array_index are off by 1 word 3263 // such that the sum is precise. 3264 z_lghi(r_array_index, BytesPerWord); // for slow path (scaled) 3265 } 3266 3267 z_cg(r_super_klass, Address(r_array_base, r_array_index)); 3268 branch_optimized(bcondEqual, L_done); // found a match; success 3269 3270 // Is there another entry to check? Consult the bitmap. 3271 testbit(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); 3272 z_bfalse(L_failure); 3273 3274 // Linear probe. Rotate the bitmap so that the next bit to test is 3275 // in Bit 2 for the look-ahead check in the slow path. 3276 if (bit != 0) { 3277 z_rllg(r_bitmap, r_bitmap, 64-bit); // rotate right 3278 } 3279 3280 // Calls into the stub generated by lookup_secondary_supers_table_slow_path. 3281 // Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap. 3282 // Kills: r_array_length. 3283 // Returns: r_result 3284 3285 call_stub(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); 3286 3287 z_bru(L_done); // pass whatever result we got from a slow path 3288 3289 bind(L_failure); 3290 // TODO: use load immediate on condition and z_bru above will not be required 3291 z_lghi(r_result, 1); 3292 3293 bind(L_done); 3294 BLOCK_COMMENT("} lookup_secondary_supers_table"); 3295 3296 if (VerifySecondarySupers) { 3297 verify_secondary_supers_table(r_sub_klass, r_super_klass, r_result, 3298 r_temp1, r_temp2, r_temp3); 3299 } 3300 } 3301 3302 // Called by code generated by check_klass_subtype_slow_path 3303 // above. This is called when there is a collision in the hashed 3304 // lookup in the secondary supers array. 3305 void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, 3306 Register r_array_base, 3307 Register r_array_index, 3308 Register r_bitmap, 3309 Register r_result, 3310 Register r_temp1) { 3311 assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp1); 3312 3313 const Register 3314 r_array_length = r_temp1, 3315 r_sub_klass = noreg; 3316 3317 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; 3318 3319 BLOCK_COMMENT("lookup_secondary_supers_table_slow_path {"); 3320 NearLabel L_done, L_failure; 3321 3322 // Load the array length. 3323 z_llgf(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 3324 3325 // And adjust the array base to point to the data. 3326 // NB! 3327 // Effectively increments the current slot index by 1. 3328 assert(Array<Klass*>::base_offset_in_bytes() == wordSize, ""); 3329 add2reg(r_array_base, Array<Klass*>::base_offset_in_bytes()); 3330 3331 // Linear probe 3332 NearLabel L_huge; 3333 3334 // The bitmap is full to bursting. 3335 z_chi(r_array_length, Klass::SECONDARY_SUPERS_BITMAP_FULL - 2); 3336 z_brh(L_huge); 3337 3338 // NB! Our caller has checked bits 0 and 1 in the bitmap. The 3339 // current slot (at secondary_supers[r_array_index]) has not yet 3340 // been inspected, and r_array_index may be out of bounds if we 3341 // wrapped around the end of the array. 3342 3343 { // This is conventional linear probing, but instead of terminating 3344 // when a null entry is found in the table, we maintain a bitmap 3345 // in which a 0 indicates missing entries. 3346 // The check above guarantees there are 0s in the bitmap, so the loop 3347 // eventually terminates. 3348 3349 #ifdef ASSERT 3350 // r_result is set to 0 by lookup_secondary_supers_table. 3351 // clear_reg(r_result, true /* whole_reg */, false /* set_cc */); 3352 z_cghi(r_result, 0); 3353 asm_assert(bcondEqual, "r_result required to be 0, used by z_locgr", 44); 3354 3355 // We should only reach here after having found a bit in the bitmap. 3356 z_ltgr(r_array_length, r_array_length); 3357 asm_assert(bcondHigh, "array_length > 0, should hold", 22); 3358 #endif // ASSERT 3359 3360 // Compute limit in r_array_length 3361 add2reg(r_array_length, -1); 3362 z_sllg(r_array_length, r_array_length, LogBytesPerWord); 3363 3364 NearLabel L_loop; 3365 bind(L_loop); 3366 3367 // Check for wraparound. 3368 z_cgr(r_array_index, r_array_length); 3369 z_locgr(r_array_index, r_result, bcondHigh); // r_result is containing 0 3370 3371 z_cg(r_super_klass, Address(r_array_base, r_array_index)); 3372 z_bre(L_done); // success 3373 3374 // look-ahead check: if Bit 2 is 0, we're done 3375 testbit(r_bitmap, 2); 3376 z_bfalse(L_failure); 3377 3378 z_rllg(r_bitmap, r_bitmap, 64-1); // rotate right 3379 add2reg(r_array_index, BytesPerWord); 3380 3381 z_bru(L_loop); 3382 } 3383 3384 { // Degenerate case: more than 64 secondary supers. 3385 // FIXME: We could do something smarter here, maybe a vectorized 3386 // comparison or a binary search, but is that worth any added 3387 // complexity? 3388 3389 bind(L_huge); 3390 repne_scan(r_array_base, r_super_klass, r_array_length, r_result); 3391 3392 z_bru(L_done); // forward the result we got from repne_scan 3393 } 3394 3395 bind(L_failure); 3396 z_lghi(r_result, 1); 3397 3398 bind(L_done); 3399 BLOCK_COMMENT("} lookup_secondary_supers_table_slow_path"); 3400 } 3401 3402 // Make sure that the hashed lookup and a linear scan agree. 3403 void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, 3404 Register r_super_klass, 3405 Register r_result /* expected */, 3406 Register r_temp1, 3407 Register r_temp2, 3408 Register r_temp3) { 3409 assert_different_registers(r_sub_klass, r_super_klass, r_result, r_temp1, r_temp2, r_temp3); 3410 3411 const Register 3412 r_array_base = r_temp1, 3413 r_array_length = r_temp2, 3414 r_array_index = r_temp3, 3415 r_bitmap = noreg; // unused 3416 3417 const Register r_one = Z_R0_scratch; 3418 z_lghi(r_one, 1); // for locgr down there, to a load result for failure 3419 3420 LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; 3421 3422 BLOCK_COMMENT("verify_secondary_supers_table {"); 3423 3424 Label L_passed, L_failure; 3425 3426 // We will consult the secondary-super array. 3427 z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); 3428 3429 // Load the array length. 3430 z_llgf(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes())); 3431 3432 // And adjust the array base to point to the data. 3433 z_aghi(r_array_base, Array<Klass*>::base_offset_in_bytes()); 3434 3435 const Register r_linear_result = r_array_index; // reuse 3436 z_chi(r_array_length, 0); 3437 z_locgr(r_linear_result, r_one, bcondNotHigh); // load failure if array_length <= 0 3438 z_brc(bcondNotHigh, L_failure); 3439 repne_scan(r_array_base, r_super_klass, r_array_length, r_linear_result); 3440 bind(L_failure); 3441 3442 z_cr(r_result, r_linear_result); 3443 z_bre(L_passed); 3444 3445 assert_different_registers(Z_ARG1, r_sub_klass, r_linear_result, r_result); 3446 lgr_if_needed(Z_ARG1, r_super_klass); 3447 assert_different_registers(Z_ARG2, r_linear_result, r_result); 3448 lgr_if_needed(Z_ARG2, r_sub_klass); 3449 assert_different_registers(Z_ARG3, r_result); 3450 z_lgr(Z_ARG3, r_linear_result); 3451 z_lgr(Z_ARG4, r_result); 3452 const char* msg = "mismatch"; 3453 load_const_optimized(Z_ARG5, (address)msg); 3454 3455 call_VM_leaf(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure)); 3456 should_not_reach_here(); 3457 3458 bind(L_passed); 3459 3460 BLOCK_COMMENT("} verify_secondary_supers_table"); 3461 } 3462 3463 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3464 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 3465 3466 Label L_fallthrough; 3467 if (L_fast_path == nullptr) { 3468 L_fast_path = &L_fallthrough; 3469 } else if (L_slow_path == nullptr) { 3470 L_slow_path = &L_fallthrough; 3471 } 3472 3473 // Fast path check: class is fully initialized. 3474 // init_state needs acquire, but S390 is TSO, and so we are already good. 3475 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3476 z_bre(*L_fast_path); 3477 3478 // Fast path check: current thread is initializer thread 3479 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3480 if (L_slow_path == &L_fallthrough) { 3481 z_bre(*L_fast_path); 3482 } else if (L_fast_path == &L_fallthrough) { 3483 z_brne(*L_slow_path); 3484 } else { 3485 Unimplemented(); 3486 } 3487 3488 bind(L_fallthrough); 3489 } 3490 3491 // Increment a counter at counter_address when the eq condition code is 3492 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3493 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3494 Label l; 3495 z_brne(l); 3496 load_const(tmp1_reg, counter_address); 3497 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3498 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3499 bind(l); 3500 } 3501 3502 // "The box" is the space on the stack where we copy the object mark. 3503 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3504 3505 assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_lock_lightweight"); 3506 assert_different_registers(oop, box, temp1, temp2, Z_R0_scratch); 3507 3508 Register displacedHeader = temp1; 3509 Register currentHeader = temp1; 3510 Register temp = temp2; 3511 3512 NearLabel done, object_has_monitor; 3513 3514 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3515 3516 BLOCK_COMMENT("compiler_fast_lock_object {"); 3517 3518 // Load markWord from oop into mark. 3519 z_lg(displacedHeader, hdr_offset, oop); 3520 3521 if (DiagnoseSyncOnValueBasedClasses != 0) { 3522 load_klass(temp, oop); 3523 z_tm(Address(temp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); 3524 z_brne(done); 3525 } 3526 3527 // Handle existing monitor. 3528 // The object has an existing monitor iff (mark & monitor_value) != 0. 3529 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3530 z_tmll(displacedHeader, markWord::monitor_value); 3531 z_brnaz(object_has_monitor); 3532 3533 if (LockingMode == LM_MONITOR) { 3534 // Set NE to indicate 'failure' -> take slow-path 3535 // From loading the markWord, we know that oop != nullptr 3536 z_ltgr(oop, oop); 3537 z_bru(done); 3538 } else { 3539 assert(LockingMode == LM_LEGACY, "must be"); 3540 // Set mark to markWord | markWord::unlocked_value. 3541 z_oill(displacedHeader, markWord::unlocked_value); 3542 3543 // Load Compare Value application register. 3544 3545 // Initialize the box (must happen before we update the object mark). 3546 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3547 3548 // Compare object markWord with mark and if equal, exchange box with object markWork. 3549 // If the compare-and-swap succeeds, then we found an unlocked object and have now locked it. 3550 z_csg(displacedHeader, box, hdr_offset, oop); 3551 assert(currentHeader == displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3552 z_bre(done); 3553 3554 // We did not see an unlocked object 3555 // currentHeader contains what is currently stored in the oop's markWord. 3556 // We might have a recursive case. Verify by checking if the owner is self. 3557 // To do so, compare the value in the markWord (currentHeader) with the stack pointer. 3558 z_sgr(currentHeader, Z_SP); 3559 load_const_optimized(temp, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); 3560 3561 z_ngr(currentHeader, temp); 3562 3563 // result zero: owner is self -> recursive lock. Indicate that by storing 0 in the box. 3564 // result not-zero: attempt failed. We don't hold the lock -> go for slow case. 3565 3566 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3567 3568 z_bru(done); 3569 } 3570 3571 bind(object_has_monitor); 3572 3573 Register zero = temp; 3574 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3575 3576 // Try to CAS owner (no owner => current thread's _lock_id). 3577 // If csg succeeds then CR=EQ, otherwise, register zero is filled 3578 // with the current owner. 3579 z_lghi(zero, 0); 3580 z_lg(Z_R0_scratch, Address(Z_thread, JavaThread::lock_id_offset())); 3581 z_csg(zero, Z_R0_scratch, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3582 3583 // Store a non-null value into the box. 3584 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3585 3586 z_bre(done); // acquired the lock for the first time. 3587 3588 BLOCK_COMMENT("fast_path_recursive_lock {"); 3589 // Check if we are already the owner (recursive lock) 3590 z_cgr(Z_R0_scratch, zero); // owner is stored in zero by "z_csg" above 3591 z_brne(done); // not a recursive lock 3592 3593 // Current thread already owns the lock. Just increment recursion count. 3594 z_agsi(Address(monitor_tagged, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll); 3595 z_cgr(zero, zero); // set the CC to EQUAL 3596 BLOCK_COMMENT("} fast_path_recursive_lock"); 3597 bind(done); 3598 3599 BLOCK_COMMENT("} compiler_fast_lock_object"); 3600 // If locking was successful, CR should indicate 'EQ'. 3601 // The compiler or the native wrapper generates a branch to the runtime call 3602 // _complete_monitor_locking_Java. 3603 } 3604 3605 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3606 3607 assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_unlock_lightweight"); 3608 assert_different_registers(oop, box, temp1, temp2, Z_R0_scratch); 3609 3610 Register displacedHeader = temp1; 3611 Register currentHeader = temp2; 3612 Register temp = temp1; 3613 3614 const int hdr_offset = oopDesc::mark_offset_in_bytes(); 3615 3616 Label done, object_has_monitor, not_recursive; 3617 3618 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3619 3620 if (LockingMode == LM_LEGACY) { 3621 // Find the lock address and load the displaced header from the stack. 3622 // if the displaced header is zero, we have a recursive unlock. 3623 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3624 z_bre(done); 3625 } 3626 3627 // Handle existing monitor. 3628 // The object has an existing monitor iff (mark & monitor_value) != 0. 3629 z_lg(currentHeader, hdr_offset, oop); 3630 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3631 3632 z_tmll(currentHeader, markWord::monitor_value); 3633 z_brnaz(object_has_monitor); 3634 3635 if (LockingMode == LM_MONITOR) { 3636 // Set NE to indicate 'failure' -> take slow-path 3637 z_ltgr(oop, oop); 3638 z_bru(done); 3639 } else { 3640 assert(LockingMode == LM_LEGACY, "must be"); 3641 // Check if it is still a lightweight lock, this is true if we see 3642 // the stack address of the basicLock in the markWord of the object 3643 // copy box to currentHeader such that csg does not kill it. 3644 z_lgr(currentHeader, box); 3645 z_csg(currentHeader, displacedHeader, hdr_offset, oop); 3646 z_bru(done); // csg sets CR as desired. 3647 } 3648 3649 // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0. 3650 // This is handled like owner thread mismatches: We take the slow path. 3651 3652 // Handle existing monitor. 3653 bind(object_has_monitor); 3654 3655 z_lg(Z_R0_scratch, Address(Z_thread, JavaThread::lock_id_offset())); 3656 z_cg(Z_R0_scratch, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3657 z_brne(done); 3658 3659 BLOCK_COMMENT("fast_path_recursive_unlock {"); 3660 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3661 z_bre(not_recursive); // if 0 then jump, it's not recursive locking 3662 3663 // Recursive inflated unlock 3664 z_agsi(Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll); 3665 z_cgr(currentHeader, currentHeader); // set the CC to EQUAL 3666 BLOCK_COMMENT("} fast_path_recursive_unlock"); 3667 z_bru(done); 3668 3669 bind(not_recursive); 3670 3671 NearLabel check_succ, set_eq_unlocked; 3672 3673 // Set owner to null. 3674 // Release to satisfy the JMM 3675 z_release(); 3676 z_lghi(temp, 0); 3677 z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3678 // We need a full fence after clearing owner to avoid stranding. 3679 z_fence(); 3680 3681 // Check if the entry lists are empty (EntryList first - by convention). 3682 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3683 z_brne(check_succ); 3684 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3685 z_bre(done); // If so we are done. 3686 3687 bind(check_succ); 3688 3689 // Check if there is a successor. 3690 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); 3691 z_brne(set_eq_unlocked); // If so we are done. 3692 3693 // Save the monitor pointer in the current thread, so we can try to 3694 // reacquire the lock in SharedRuntime::monitor_exit_helper(). 3695 z_xilf(currentHeader, markWord::monitor_value); 3696 z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); 3697 3698 z_ltgr(oop, oop); // Set flag = NE 3699 z_bru(done); 3700 3701 bind(set_eq_unlocked); 3702 z_cr(temp, temp); // Set flag = EQ 3703 3704 bind(done); 3705 3706 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3707 // flag == EQ indicates success 3708 // flag == NE indicates failure 3709 } 3710 3711 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3712 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3713 bs->resolve_jobject(this, value, tmp1, tmp2); 3714 } 3715 3716 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 3717 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3718 bs->resolve_global_jobject(this, value, tmp1, tmp2); 3719 } 3720 3721 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3722 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3723 BLOCK_COMMENT("set_last_Java_frame {"); 3724 3725 // Always set last_Java_pc and flags first because once last_Java_sp 3726 // is visible has_last_Java_frame is true and users will look at the 3727 // rest of the fields. (Note: flags should always be zero before we 3728 // get here so doesn't need to be set.) 3729 3730 // Verify that last_Java_pc was zeroed on return to Java. 3731 if (allow_relocation) { 3732 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3733 Z_thread, 3734 "last_Java_pc not zeroed before leaving Java", 3735 0x200); 3736 } else { 3737 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3738 Z_thread, 3739 "last_Java_pc not zeroed before leaving Java", 3740 0x200); 3741 } 3742 3743 // When returning from calling out from Java mode the frame anchor's 3744 // last_Java_pc will always be set to null. It is set here so that 3745 // if we are doing a call to native (not VM) that we capture the 3746 // known pc and don't have to rely on the native call having a 3747 // standard frame linkage where we can find the pc. 3748 if (last_Java_pc!=noreg) { 3749 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3750 } 3751 3752 // This membar release is not required on z/Architecture, since the sequence of stores 3753 // in maintained. Nevertheless, we leave it in to document the required ordering. 3754 // The implementation of z_release() should be empty. 3755 // z_release(); 3756 3757 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3758 BLOCK_COMMENT("} set_last_Java_frame"); 3759 } 3760 3761 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3762 BLOCK_COMMENT("reset_last_Java_frame {"); 3763 3764 if (allow_relocation) { 3765 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3766 Z_thread, 3767 "SP was not set, still zero", 3768 0x202); 3769 } else { 3770 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3771 Z_thread, 3772 "SP was not set, still zero", 3773 0x202); 3774 } 3775 3776 // _last_Java_sp = 0 3777 // Clearing storage must be atomic here, so don't use clear_mem()! 3778 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3779 3780 // _last_Java_pc = 0 3781 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3782 3783 BLOCK_COMMENT("} reset_last_Java_frame"); 3784 return; 3785 } 3786 3787 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3788 assert_different_registers(sp, tmp1); 3789 3790 // We cannot trust that code generated by the C++ compiler saves R14 3791 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3792 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3793 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3794 // it into the frame anchor. 3795 get_PC(tmp1); 3796 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3797 } 3798 3799 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3800 z_release(); 3801 3802 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3803 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3804 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3805 } 3806 3807 void MacroAssembler::get_vm_result(Register oop_result) { 3808 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3809 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3810 3811 verify_oop(oop_result, FILE_AND_LINE); 3812 } 3813 3814 void MacroAssembler::get_vm_result_2(Register result) { 3815 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3816 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3817 } 3818 3819 // We require that C code which does not return a value in vm_result will 3820 // leave it undisturbed. 3821 void MacroAssembler::set_vm_result(Register oop_result) { 3822 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3823 } 3824 3825 // Explicit null checks (used for method handle code). 3826 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3827 if (!ImplicitNullChecks) { 3828 NearLabel ok; 3829 3830 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3831 3832 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3833 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3834 load_absolute_address(reg, exception_entry); 3835 z_br(reg); 3836 3837 bind(ok); 3838 } else { 3839 if (needs_explicit_null_check((intptr_t)offset)) { 3840 // Provoke OS null exception if reg is null by 3841 // accessing M[reg] w/o changing any registers. 3842 z_lg(tmp, 0, reg); 3843 } 3844 // else 3845 // Nothing to do, (later) access of M[reg + offset] 3846 // will provoke OS null exception if reg is null. 3847 } 3848 } 3849 3850 //------------------------------------- 3851 // Compressed Klass Pointers 3852 //------------------------------------- 3853 3854 // Klass oop manipulations if compressed. 3855 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3856 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3857 address base = CompressedKlassPointers::base(); 3858 int shift = CompressedKlassPointers::shift(); 3859 bool need_zero_extend = base != 0; 3860 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3861 3862 BLOCK_COMMENT("cKlass encoder {"); 3863 3864 #ifdef ASSERT 3865 Label ok; 3866 z_tmll(current, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 3867 z_brc(Assembler::bcondAllZero, ok); 3868 // The plain disassembler does not recognize illtrap. It instead displays 3869 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3870 // the proper beginning of the next instruction. 3871 z_illtrap(0xee); 3872 z_illtrap(0xee); 3873 bind(ok); 3874 #endif 3875 3876 // Scale down the incoming klass pointer first. 3877 // We then can be sure we calculate an offset that fits into 32 bit. 3878 // More generally speaking: all subsequent calculations are purely 32-bit. 3879 if (shift != 0) { 3880 z_srlg(dst, current, shift); 3881 current = dst; 3882 } 3883 3884 if (base != nullptr) { 3885 // Use scaled-down base address parts to match scaled-down klass pointer. 3886 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3887 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3888 3889 // General considerations: 3890 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3891 // Otherwise, we would end up with a compressed klass pointer which doesn't 3892 // fit into 32-bit. 3893 // - Only bit#33 of the difference could potentially be non-zero. For that 3894 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3895 // will create a borrow out of bit#32, nicely killing bit#33. 3896 // - With the above, we only need to consider current_l and base_l to 3897 // calculate the result. 3898 // - Both values are treated as unsigned. The unsigned subtraction is 3899 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3900 3901 if (base_l == 0) { 3902 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3903 // cancel all high-word bits. Otherwise, we would end up with an offset 3904 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3905 // - current_l remains unchanged. 3906 // - Therefore, we can replace all calculation with just a 3907 // zero-extending load 32 to 64 bit. 3908 // - Even that can be replaced with a conditional load if dst != current. 3909 // (this is a local view. The shift step may have requested zero-extension). 3910 } else { 3911 if ((base_h == 0) && is_uimm(base_l, 31)) { 3912 // If we happen to find that (base_h == 0), and that base_l is within the range 3913 // which can be represented by a signed int, then we can use 64bit signed add with 3914 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3915 // upper 32 bits of the result, saving us the need of an extra zero extension. 3916 // For base_l to be in the required range, it must not have the most significant 3917 // bit (aka sign bit) set. 3918 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3919 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3920 need_zero_extend = false; 3921 current = dst; 3922 } else { 3923 // To begin with, we may need to copy and/or zero-extend the register operand. 3924 // We have to calculate (current_l - base_l). Because there is no unsigend 3925 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3926 if (need_zero_extend) { 3927 z_llgfr(dst, current); 3928 need_zero_extend = false; 3929 } else { 3930 llgfr_if_needed(dst, current); 3931 } 3932 current = dst; 3933 z_alfi(dst, -base_l); 3934 } 3935 } 3936 } 3937 3938 if (need_zero_extend) { 3939 // We must zero-extend the calculated result. It may have some leftover bits in 3940 // the hi-word because we only did optimized calculations. 3941 z_llgfr(dst, current); 3942 } else { 3943 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3944 } 3945 3946 BLOCK_COMMENT("} cKlass encoder"); 3947 } 3948 3949 // This function calculates the size of the code generated by 3950 // decode_klass_not_null(register dst, Register src) 3951 // when Universe::heap() isn't null. Hence, if the instructions 3952 // it generates change, then this method needs to be updated. 3953 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3954 address base = CompressedKlassPointers::base(); 3955 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3956 int addbase_size = 0; 3957 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3958 3959 if (base != nullptr) { 3960 unsigned int base_h = ((unsigned long)base)>>32; 3961 unsigned int base_l = (unsigned int)((unsigned long)base); 3962 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3963 addbase_size += 6; /* aih */ 3964 } else if ((base_h == 0) && (base_l != 0)) { 3965 addbase_size += 6; /* algfi */ 3966 } else { 3967 addbase_size += load_const_size(); 3968 addbase_size += 4; /* algr */ 3969 } 3970 } 3971 #ifdef ASSERT 3972 addbase_size += 10; 3973 addbase_size += 2; // Extra sigill. 3974 #endif 3975 return addbase_size + shift_size; 3976 } 3977 3978 // !!! If the instructions that get generated here change 3979 // then function instr_size_for_decode_klass_not_null() 3980 // needs to get updated. 3981 // This variant of decode_klass_not_null() must generate predictable code! 3982 // The code must only depend on globally known parameters. 3983 void MacroAssembler::decode_klass_not_null(Register dst) { 3984 address base = CompressedKlassPointers::base(); 3985 int shift = CompressedKlassPointers::shift(); 3986 int beg_off = offset(); 3987 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3988 3989 BLOCK_COMMENT("cKlass decoder (const size) {"); 3990 3991 if (shift != 0) { // Shift required? 3992 z_sllg(dst, dst, shift); 3993 } 3994 if (base != nullptr) { 3995 unsigned int base_h = ((unsigned long)base)>>32; 3996 unsigned int base_l = (unsigned int)((unsigned long)base); 3997 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3998 z_aih(dst, base_h); // Base has no set bits in lower half. 3999 } else if ((base_h == 0) && (base_l != 0)) { 4000 z_algfi(dst, base_l); // Base has no set bits in upper half. 4001 } else { 4002 load_const(Z_R0, base); // Base has set bits everywhere. 4003 z_algr(dst, Z_R0); 4004 } 4005 } 4006 4007 #ifdef ASSERT 4008 Label ok; 4009 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 4010 z_brc(Assembler::bcondAllZero, ok); 4011 // The plain disassembler does not recognize illtrap. It instead displays 4012 // a 32-bit value. Issuing two illtraps assures the disassembler finds 4013 // the proper beginning of the next instruction. 4014 z_illtrap(0xd1); 4015 z_illtrap(0xd1); 4016 bind(ok); 4017 #endif 4018 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 4019 4020 BLOCK_COMMENT("} cKlass decoder (const size)"); 4021 } 4022 4023 // This variant of decode_klass_not_null() is for cases where 4024 // 1) the size of the generated instructions may vary 4025 // 2) the result is (potentially) stored in a register different from the source. 4026 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 4027 address base = CompressedKlassPointers::base(); 4028 int shift = CompressedKlassPointers::shift(); 4029 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 4030 4031 BLOCK_COMMENT("cKlass decoder {"); 4032 4033 if (src == noreg) src = dst; 4034 4035 if (shift != 0) { // Shift or at least move required? 4036 z_sllg(dst, src, shift); 4037 } else { 4038 lgr_if_needed(dst, src); 4039 } 4040 4041 if (base != nullptr) { 4042 unsigned int base_h = ((unsigned long)base)>>32; 4043 unsigned int base_l = (unsigned int)((unsigned long)base); 4044 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 4045 z_aih(dst, base_h); // Base has not set bits in lower half. 4046 } else if ((base_h == 0) && (base_l != 0)) { 4047 z_algfi(dst, base_l); // Base has no set bits in upper half. 4048 } else { 4049 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 4050 z_algr(dst, Z_R0); 4051 } 4052 } 4053 4054 #ifdef ASSERT 4055 Label ok; 4056 z_tmll(dst, CompressedKlassPointers::klass_alignment_in_bytes() - 1); // Check alignment. 4057 z_brc(Assembler::bcondAllZero, ok); 4058 // The plain disassembler does not recognize illtrap. It instead displays 4059 // a 32-bit value. Issuing two illtraps assures the disassembler finds 4060 // the proper beginning of the next instruction. 4061 z_illtrap(0xd2); 4062 z_illtrap(0xd2); 4063 bind(ok); 4064 #endif 4065 BLOCK_COMMENT("} cKlass decoder"); 4066 } 4067 4068 void MacroAssembler::load_klass(Register klass, Address mem) { 4069 if (UseCompressedClassPointers) { 4070 z_llgf(klass, mem); 4071 // Attention: no null check here! 4072 decode_klass_not_null(klass); 4073 } else { 4074 z_lg(klass, mem); 4075 } 4076 } 4077 4078 // Loads the obj's Klass* into dst. 4079 // Input: 4080 // src - the oop we want to load the klass from. 4081 // dst - output nklass. 4082 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) { 4083 BLOCK_COMMENT("load_narrow_klass_compact {"); 4084 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders"); 4085 z_lg(dst, Address(src, oopDesc::mark_offset_in_bytes())); 4086 z_srlg(dst, dst, markWord::klass_shift); 4087 BLOCK_COMMENT("} load_narrow_klass_compact"); 4088 } 4089 4090 void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) { 4091 BLOCK_COMMENT("cmp_klass {"); 4092 assert_different_registers(obj, klass, tmp); 4093 if (UseCompactObjectHeaders) { 4094 assert(tmp != noreg, "required"); 4095 assert_different_registers(klass, obj, tmp); 4096 load_narrow_klass_compact(tmp, obj); 4097 z_cr(klass, tmp); 4098 } else if (UseCompressedClassPointers) { 4099 z_c(klass, Address(obj, oopDesc::klass_offset_in_bytes())); 4100 } else { 4101 z_cg(klass, Address(obj, oopDesc::klass_offset_in_bytes())); 4102 } 4103 BLOCK_COMMENT("} cmp_klass"); 4104 } 4105 4106 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) { 4107 BLOCK_COMMENT("cmp_klasses_from_objects {"); 4108 if (UseCompactObjectHeaders) { 4109 assert(tmp1 != noreg && tmp2 != noreg, "required"); 4110 assert_different_registers(obj1, obj2, tmp1, tmp2); 4111 load_narrow_klass_compact(tmp1, obj1); 4112 load_narrow_klass_compact(tmp2, obj2); 4113 z_cr(tmp1, tmp2); 4114 } else if (UseCompressedClassPointers) { 4115 z_l(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes())); 4116 z_c(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes())); 4117 } else { 4118 z_lg(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes())); 4119 z_cg(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes())); 4120 } 4121 BLOCK_COMMENT("} cmp_klasses_from_objects"); 4122 } 4123 4124 void MacroAssembler::load_klass(Register klass, Register src_oop) { 4125 if (UseCompactObjectHeaders) { 4126 load_narrow_klass_compact(klass, src_oop); 4127 decode_klass_not_null(klass); 4128 } else if (UseCompressedClassPointers) { 4129 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 4130 decode_klass_not_null(klass); 4131 } else { 4132 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 4133 } 4134 } 4135 4136 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 4137 assert(!UseCompactObjectHeaders, "Don't use with compact headers"); 4138 if (UseCompressedClassPointers) { 4139 assert_different_registers(dst_oop, klass, Z_R0); 4140 if (ck == noreg) ck = klass; 4141 encode_klass_not_null(ck, klass); 4142 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 4143 } else { 4144 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 4145 } 4146 } 4147 4148 void MacroAssembler::store_klass_gap(Register s, Register d) { 4149 assert(!UseCompactObjectHeaders, "Don't use with compact headers"); 4150 if (UseCompressedClassPointers) { 4151 assert(s != d, "not enough registers"); 4152 // Support s = noreg. 4153 if (s != noreg) { 4154 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 4155 } else { 4156 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 4157 } 4158 } 4159 } 4160 4161 // Compare klass ptr in memory against klass ptr in register. 4162 // 4163 // Rop1 - klass in register, always uncompressed. 4164 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 4165 // Rbase - Base address of cKlass in memory. 4166 // maybenull - True if Rop1 possibly is a null. 4167 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull) { 4168 4169 BLOCK_COMMENT("compare klass ptr {"); 4170 4171 if (UseCompressedClassPointers) { 4172 const int shift = CompressedKlassPointers::shift(); 4173 address base = CompressedKlassPointers::base(); 4174 4175 if (UseCompactObjectHeaders) { 4176 assert(shift >= 3, "cKlass encoder detected bad shift"); 4177 } else { 4178 assert((shift == 0) || (shift == 3), "cKlass encoder detected bad shift"); 4179 } 4180 assert_different_registers(Rop1, Z_R0); 4181 assert_different_registers(Rop1, Rbase, Z_R1); 4182 4183 // First encode register oop and then compare with cOop in memory. 4184 // This sequence saves an unnecessary cOop load and decode. 4185 if (base == nullptr) { 4186 if (shift == 0) { 4187 z_cl(Rop1, disp, Rbase); // Unscaled 4188 } else { 4189 z_srlg(Z_R0, Rop1, shift); // ZeroBased 4190 z_cl(Z_R0, disp, Rbase); 4191 } 4192 } else { // HeapBased 4193 #ifdef ASSERT 4194 bool used_R0 = true; 4195 bool used_R1 = true; 4196 #endif 4197 Register current = Rop1; 4198 Label done; 4199 4200 if (maybenull) { // null pointer must be preserved! 4201 z_ltgr(Z_R0, current); 4202 z_bre(done); 4203 current = Z_R0; 4204 } 4205 4206 unsigned int base_h = ((unsigned long)base)>>32; 4207 unsigned int base_l = (unsigned int)((unsigned long)base); 4208 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 4209 lgr_if_needed(Z_R0, current); 4210 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 4211 } else if ((base_h == 0) && (base_l != 0)) { 4212 lgr_if_needed(Z_R0, current); 4213 z_agfi(Z_R0, -(int)base_l); 4214 } else { 4215 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 4216 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 4217 } 4218 4219 if (shift != 0) { 4220 z_srlg(Z_R0, Z_R0, shift); 4221 } 4222 bind(done); 4223 z_cl(Z_R0, disp, Rbase); 4224 #ifdef ASSERT 4225 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 4226 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 4227 #endif 4228 } 4229 } else { 4230 z_clg(Rop1, disp, Z_R0, Rbase); 4231 } 4232 BLOCK_COMMENT("} compare klass ptr"); 4233 } 4234 4235 //--------------------------- 4236 // Compressed oops 4237 //--------------------------- 4238 4239 void MacroAssembler::encode_heap_oop(Register oop) { 4240 oop_encoder(oop, oop, true /*maybe null*/); 4241 } 4242 4243 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 4244 oop_encoder(oop, oop, false /*not null*/); 4245 } 4246 4247 // Called with something derived from the oop base. e.g. oop_base>>3. 4248 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 4249 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 4250 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 4251 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 4252 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 4253 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 4254 + (oop_base_lh == 0 ? 0:1) 4255 + (oop_base_hl == 0 ? 0:1) 4256 + (oop_base_hh == 0 ? 0:1); 4257 4258 assert(oop_base != 0, "This is for HeapBased cOops only"); 4259 4260 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 4261 uint64_t pow2_offset = 0x10000 - oop_base_ll; 4262 if (pow2_offset < 0x8000) { // This might not be necessary. 4263 uint64_t oop_base2 = oop_base + pow2_offset; 4264 4265 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 4266 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 4267 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 4268 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 4269 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 4270 (oop_base_lh == 0 ? 0:1) + 4271 (oop_base_hl == 0 ? 0:1) + 4272 (oop_base_hh == 0 ? 0:1); 4273 if (n_notzero_parts == 1) { 4274 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 4275 return -pow2_offset; 4276 } 4277 } 4278 } 4279 return 0; 4280 } 4281 4282 // If base address is offset from a straight power of two by just a few pages, 4283 // return this offset to the caller for a possible later composite add. 4284 // TODO/FIX: will only work correctly for 4k pages. 4285 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 4286 int pow2_offset = get_oop_base_pow2_offset(oop_base); 4287 4288 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 4289 4290 return pow2_offset; 4291 } 4292 4293 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 4294 int offset = get_oop_base(Rbase, oop_base); 4295 z_lcgr(Rbase, Rbase); 4296 return -offset; 4297 } 4298 4299 // Compare compressed oop in memory against oop in register. 4300 // Rop1 - Oop in register. 4301 // disp - Offset of cOop in memory. 4302 // Rbase - Base address of cOop in memory. 4303 // maybenull - True if Rop1 possibly is a null. 4304 // maybenulltarget - Branch target for Rop1 == nullptr, if flow control shall NOT continue with compare instruction. 4305 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybenull) { 4306 Register Rbase = mem.baseOrR0(); 4307 Register Rindex = mem.indexOrR0(); 4308 int64_t disp = mem.disp(); 4309 4310 const int shift = CompressedOops::shift(); 4311 address base = CompressedOops::base(); 4312 4313 assert(UseCompressedOops, "must be on to call this method"); 4314 assert(Universe::heap() != nullptr, "java heap must be initialized to call this method"); 4315 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 4316 assert_different_registers(Rop1, Z_R0); 4317 assert_different_registers(Rop1, Rbase, Z_R1); 4318 assert_different_registers(Rop1, Rindex, Z_R1); 4319 4320 BLOCK_COMMENT("compare heap oop {"); 4321 4322 // First encode register oop and then compare with cOop in memory. 4323 // This sequence saves an unnecessary cOop load and decode. 4324 if (base == nullptr) { 4325 if (shift == 0) { 4326 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 4327 } else { 4328 z_srlg(Z_R0, Rop1, shift); // ZeroBased 4329 z_cl(Z_R0, disp, Rindex, Rbase); 4330 } 4331 } else { // HeapBased 4332 #ifdef ASSERT 4333 bool used_R0 = true; 4334 bool used_R1 = true; 4335 #endif 4336 Label done; 4337 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 4338 4339 if (maybenull) { // null pointer must be preserved! 4340 z_ltgr(Z_R0, Rop1); 4341 z_bre(done); 4342 } 4343 4344 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 4345 z_srlg(Z_R0, Z_R0, shift); 4346 4347 bind(done); 4348 z_cl(Z_R0, disp, Rindex, Rbase); 4349 #ifdef ASSERT 4350 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 4351 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 4352 #endif 4353 } 4354 BLOCK_COMMENT("} compare heap oop"); 4355 } 4356 4357 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 4358 const Address& addr, Register val, 4359 Register tmp1, Register tmp2, Register tmp3) { 4360 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 4361 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 4362 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4363 decorators = AccessInternal::decorator_fixup(decorators, type); 4364 bool as_raw = (decorators & AS_RAW) != 0; 4365 if (as_raw) { 4366 bs->BarrierSetAssembler::store_at(this, decorators, type, 4367 addr, val, 4368 tmp1, tmp2, tmp3); 4369 } else { 4370 bs->store_at(this, decorators, type, 4371 addr, val, 4372 tmp1, tmp2, tmp3); 4373 } 4374 } 4375 4376 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 4377 const Address& addr, Register dst, 4378 Register tmp1, Register tmp2, Label *is_null) { 4379 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 4380 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 4381 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 4382 decorators = AccessInternal::decorator_fixup(decorators, type); 4383 bool as_raw = (decorators & AS_RAW) != 0; 4384 if (as_raw) { 4385 bs->BarrierSetAssembler::load_at(this, decorators, type, 4386 addr, dst, 4387 tmp1, tmp2, is_null); 4388 } else { 4389 bs->load_at(this, decorators, type, 4390 addr, dst, 4391 tmp1, tmp2, is_null); 4392 } 4393 } 4394 4395 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 4396 Register tmp1, Register tmp2, 4397 DecoratorSet decorators, Label *is_null) { 4398 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 4399 } 4400 4401 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 4402 Register tmp1, Register tmp2, Register tmp3, 4403 DecoratorSet decorators) { 4404 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 4405 } 4406 4407 //------------------------------------------------- 4408 // Encode compressed oop. Generally usable encoder. 4409 //------------------------------------------------- 4410 // Rsrc - contains regular oop on entry. It remains unchanged. 4411 // Rdst - contains compressed oop on exit. 4412 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 4413 // 4414 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 4415 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 4416 // 4417 // only32bitValid is set, if later code only uses the lower 32 bits. In this 4418 // case we must not fix the upper 32 bits. 4419 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybenull, 4420 Register Rbase, int pow2_offset, bool only32bitValid) { 4421 4422 const address oop_base = CompressedOops::base(); 4423 const int oop_shift = CompressedOops::shift(); 4424 const bool disjoint = CompressedOops::base_disjoint(); 4425 4426 assert(UseCompressedOops, "must be on to call this method"); 4427 assert(Universe::heap() != nullptr, "java heap must be initialized to call this encoder"); 4428 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 4429 4430 if (disjoint || (oop_base == nullptr)) { 4431 BLOCK_COMMENT("cOop encoder zeroBase {"); 4432 if (oop_shift == 0) { 4433 if (oop_base != nullptr && !only32bitValid) { 4434 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 4435 } else { 4436 lgr_if_needed(Rdst, Rsrc); 4437 } 4438 } else { 4439 z_srlg(Rdst, Rsrc, oop_shift); 4440 if (oop_base != nullptr && !only32bitValid) { 4441 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4442 } 4443 } 4444 BLOCK_COMMENT("} cOop encoder zeroBase"); 4445 return; 4446 } 4447 4448 bool used_R0 = false; 4449 bool used_R1 = false; 4450 4451 BLOCK_COMMENT("cOop encoder general {"); 4452 assert_different_registers(Rdst, Z_R1); 4453 assert_different_registers(Rsrc, Rbase); 4454 if (maybenull) { 4455 Label done; 4456 // We reorder shifting and subtracting, so that we can compare 4457 // and shift in parallel: 4458 // 4459 // cycle 0: potential LoadN, base = <const> 4460 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 4461 // cycle 2: if (cr) br, dst = dst + base + offset 4462 4463 // Get oop_base components. 4464 if (pow2_offset == -1) { 4465 if (Rdst == Rbase) { 4466 if (Rdst == Z_R1 || Rsrc == Z_R1) { 4467 Rbase = Z_R0; 4468 used_R0 = true; 4469 } else { 4470 Rdst = Z_R1; 4471 used_R1 = true; 4472 } 4473 } 4474 if (Rbase == Z_R1) { 4475 used_R1 = true; 4476 } 4477 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 4478 } 4479 assert_different_registers(Rdst, Rbase); 4480 4481 // Check for null oop (must be left alone) and shift. 4482 if (oop_shift != 0) { // Shift out alignment bits 4483 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 4484 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4485 } else { 4486 z_srlg(Rdst, Rsrc, oop_shift); 4487 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 4488 // This probably is faster, as it does not write a register. No! 4489 // z_cghi(Rsrc, 0); 4490 } 4491 } else { 4492 z_ltgr(Rdst, Rsrc); // Move null to result register. 4493 } 4494 z_bre(done); 4495 4496 // Subtract oop_base components. 4497 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 4498 z_algr(Rdst, Rbase); 4499 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 4500 } else { 4501 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4502 } 4503 if (!only32bitValid) { 4504 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4505 } 4506 bind(done); 4507 4508 } else { // not null 4509 // Get oop_base components. 4510 if (pow2_offset == -1) { 4511 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4512 } 4513 4514 // Subtract oop_base components and shift. 4515 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4516 // Don't use lay instruction. 4517 if (Rdst == Rsrc) { 4518 z_algr(Rdst, Rbase); 4519 } else { 4520 lgr_if_needed(Rdst, Rbase); 4521 z_algr(Rdst, Rsrc); 4522 } 4523 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4524 } else { 4525 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4526 } 4527 if (oop_shift != 0) { // Shift out alignment bits. 4528 z_srlg(Rdst, Rdst, oop_shift); 4529 } 4530 if (!only32bitValid) { 4531 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4532 } 4533 } 4534 #ifdef ASSERT 4535 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4536 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4537 #endif 4538 BLOCK_COMMENT("} cOop encoder general"); 4539 } 4540 4541 //------------------------------------------------- 4542 // decode compressed oop. Generally usable decoder. 4543 //------------------------------------------------- 4544 // Rsrc - contains compressed oop on entry. 4545 // Rdst - contains regular oop on exit. 4546 // Rdst and Rsrc may indicate same register. 4547 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4548 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4549 // Rbase - register to use for the base 4550 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4551 // For performance, it is good to 4552 // - avoid Z_R0 for any of the argument registers. 4553 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4554 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4555 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybenull, Register Rbase, int pow2_offset) { 4556 4557 const address oop_base = CompressedOops::base(); 4558 const int oop_shift = CompressedOops::shift(); 4559 const bool disjoint = CompressedOops::base_disjoint(); 4560 4561 assert(UseCompressedOops, "must be on to call this method"); 4562 assert(Universe::heap() != nullptr, "java heap must be initialized to call this decoder"); 4563 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4564 "cOop encoder detected bad shift"); 4565 4566 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4567 4568 if (oop_base != nullptr) { 4569 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4570 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4571 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4572 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4573 BLOCK_COMMENT("cOop decoder disjointBase {"); 4574 // We do not need to load the base. Instead, we can install the upper bits 4575 // with an OR instead of an ADD. 4576 Label done; 4577 4578 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4579 if (maybenull) { // null pointer must be preserved! 4580 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4581 z_bre(done); 4582 } else { 4583 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4584 } 4585 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4586 z_oihf(Rdst, oop_base_hf); 4587 } else if (oop_base_hl != 0) { 4588 z_oihl(Rdst, oop_base_hl); 4589 } else { 4590 assert(oop_base_hh != 0, "not heapbased mode"); 4591 z_oihh(Rdst, oop_base_hh); 4592 } 4593 bind(done); 4594 BLOCK_COMMENT("} cOop decoder disjointBase"); 4595 } else { 4596 BLOCK_COMMENT("cOop decoder general {"); 4597 // There are three decode steps: 4598 // scale oop offset (shift left) 4599 // get base (in reg) and pow2_offset (constant) 4600 // add base, pow2_offset, and oop offset 4601 // The following register overlap situations may exist: 4602 // Rdst == Rsrc, Rbase any other 4603 // not a problem. Scaling in-place leaves Rbase undisturbed. 4604 // Loading Rbase does not impact the scaled offset. 4605 // Rdst == Rbase, Rsrc any other 4606 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4607 // would destroy the scaled offset. 4608 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4609 // use Rbase_tmp if base has to be loaded. 4610 // Rsrc == Rbase, Rdst any other 4611 // Only possible without preloaded Rbase. 4612 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4613 // Rsrc == Rbase, Rdst == Rbase 4614 // Only possible without preloaded Rbase. 4615 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4616 // Remedy: use Rbase_tmp. 4617 // 4618 Label done; 4619 Register Rdst_tmp = Rdst; 4620 Register Rbase_tmp = Rbase; 4621 bool used_R0 = false; 4622 bool used_R1 = false; 4623 bool base_preloaded = pow2_offset >= 0; 4624 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4625 assert(oop_shift != 0, "room for optimization"); 4626 4627 // Check if we need to use scratch registers. 4628 if (Rdst == Rbase) { 4629 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4630 if (Rdst != Rsrc) { 4631 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4632 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4633 } else { 4634 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4635 } 4636 } 4637 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4638 4639 // Scale oop and check for null. 4640 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4641 if (maybenull) { // null pointer must be preserved! 4642 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4643 z_bre(done); 4644 } else { 4645 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4646 } 4647 4648 // Get oop_base components. 4649 if (!base_preloaded) { 4650 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4651 } 4652 4653 // Add up all components. 4654 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4655 z_algr(Rdst_tmp, Rbase_tmp); 4656 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4657 } else { 4658 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4659 } 4660 4661 bind(done); 4662 lgr_if_needed(Rdst, Rdst_tmp); 4663 #ifdef ASSERT 4664 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4665 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4666 #endif 4667 BLOCK_COMMENT("} cOop decoder general"); 4668 } 4669 } else { 4670 BLOCK_COMMENT("cOop decoder zeroBase {"); 4671 if (oop_shift == 0) { 4672 lgr_if_needed(Rdst, Rsrc); 4673 } else { 4674 z_sllg(Rdst, Rsrc, oop_shift); 4675 } 4676 BLOCK_COMMENT("} cOop decoder zeroBase"); 4677 } 4678 } 4679 4680 // ((OopHandle)result).resolve(); 4681 void MacroAssembler::resolve_oop_handle(Register result) { 4682 // OopHandle::resolve is an indirection. 4683 z_lg(result, 0, result); 4684 } 4685 4686 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4687 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4688 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset())); 4689 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4690 resolve_oop_handle(mirror); 4691 } 4692 4693 void MacroAssembler::load_method_holder(Register holder, Register method) { 4694 mem2reg_opt(holder, Address(method, Method::const_offset())); 4695 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4696 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset())); 4697 } 4698 4699 //--------------------------------------------------------------- 4700 //--- Operations on arrays. 4701 //--------------------------------------------------------------- 4702 4703 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4704 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4705 // work registers anyway. 4706 // Actually, only r0, r1, and r5 are killed. 4707 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4708 4709 int block_start = offset(); 4710 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4711 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4712 4713 Label doXC, doMVCLE, done; 4714 4715 BLOCK_COMMENT("Clear_Array {"); 4716 4717 // Check for zero len and convert to long. 4718 z_ltgfr(odd_tmp_reg, cnt_arg); 4719 z_bre(done); // Nothing to do if len == 0. 4720 4721 // Prefetch data to be cleared. 4722 if (VM_Version::has_Prefetch()) { 4723 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4724 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4725 } 4726 4727 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4728 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4729 z_brnh(doXC); // If so, use executed XC to clear. 4730 4731 // MVCLE: initialize long arrays (general case). 4732 bind(doMVCLE); 4733 z_lgr(dst_addr, base_pointer_arg); 4734 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4735 // The even register of the register pair is not killed. 4736 clear_reg(odd_tmp_reg, true, false); 4737 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4738 z_bru(done); 4739 4740 // XC: initialize short arrays. 4741 Label XC_template; // Instr template, never exec directly! 4742 bind(XC_template); 4743 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4744 4745 bind(doXC); 4746 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4747 if (VM_Version::has_ExecuteExtensions()) { 4748 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4749 } else { 4750 z_larl(odd_tmp_reg, XC_template); 4751 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4752 } 4753 // z_bru(done); // fallthru 4754 4755 bind(done); 4756 4757 BLOCK_COMMENT("} Clear_Array"); 4758 4759 int block_end = offset(); 4760 return block_end - block_start; 4761 } 4762 4763 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4764 // Emitter does not KILL any arguments nor work registers. 4765 // Emitter generates up to 16 XC instructions, depending on the array length. 4766 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4767 int block_start = offset(); 4768 int off; 4769 int lineSize_Bytes = AllocatePrefetchStepSize; 4770 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4771 bool doPrefetch = VM_Version::has_Prefetch(); 4772 int XC_maxlen = 256; 4773 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4774 4775 BLOCK_COMMENT("Clear_Array_Const {"); 4776 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4777 4778 // Do less prefetching for very short arrays. 4779 if (numXCInstr > 0) { 4780 // Prefetch only some cache lines, then begin clearing. 4781 if (doPrefetch) { 4782 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4783 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4784 } else { 4785 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4786 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4787 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4788 } 4789 } 4790 } 4791 4792 for (off=0; off<(numXCInstr-1); off++) { 4793 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4794 4795 // Prefetch some cache lines in advance. 4796 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4797 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4798 } 4799 } 4800 if (off*XC_maxlen < cnt*BytesPerWord) { 4801 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4802 } 4803 } 4804 BLOCK_COMMENT("} Clear_Array_Const"); 4805 4806 int block_end = offset(); 4807 return block_end - block_start; 4808 } 4809 4810 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4811 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4812 // work registers anyway. 4813 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4814 // 4815 // For very large arrays, exploit MVCLE H/W support. 4816 // MVCLE instruction automatically exploits H/W-optimized page mover. 4817 // - Bytes up to next page boundary are cleared with a series of XC to self. 4818 // - All full pages are cleared with the page mover H/W assist. 4819 // - Remaining bytes are again cleared by a series of XC to self. 4820 // 4821 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4822 4823 int block_start = offset(); 4824 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4825 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4826 4827 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4828 4829 // Get len to clear. 4830 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4831 4832 // Prepare other args to MVCLE. 4833 z_lgr(dst_addr, base_pointer_arg); 4834 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4835 // The even register of the register pair is not killed. 4836 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4837 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4838 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4839 4840 int block_end = offset(); 4841 return block_end - block_start; 4842 } 4843 4844 // Allocator. 4845 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4846 Register cnt_reg, 4847 Register tmp1_reg, Register tmp2_reg) { 4848 // Tmp1 is oddReg. 4849 // Tmp2 is evenReg. 4850 4851 int block_start = offset(); 4852 Label doMVC, doMVCLE, done, MVC_template; 4853 4854 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4855 4856 // Check for zero len and convert to long. 4857 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4858 z_bre(done); // Nothing to do if len == 0. 4859 4860 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4861 4862 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4863 z_brnh(doMVC); // If so, use executed MVC to clear. 4864 4865 bind(doMVCLE); // A lot of data (more than 256 bytes). 4866 // Prep dest reg pair. 4867 z_lgr(Z_R0, dst_reg); // dst addr 4868 // Dst len already in Z_R1. 4869 // Prep src reg pair. 4870 z_lgr(tmp2_reg, src_reg); // src addr 4871 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4872 4873 // Do the copy. 4874 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4875 z_bru(done); // All done. 4876 4877 bind(MVC_template); // Just some data (not more than 256 bytes). 4878 z_mvc(0, 0, dst_reg, 0, src_reg); 4879 4880 bind(doMVC); 4881 4882 if (VM_Version::has_ExecuteExtensions()) { 4883 add2reg(Z_R1, -1); 4884 } else { 4885 add2reg(tmp1_reg, -1, Z_R1); 4886 z_larl(Z_R1, MVC_template); 4887 } 4888 4889 if (VM_Version::has_Prefetch()) { 4890 z_pfd(1, 0,Z_R0,src_reg); 4891 z_pfd(2, 0,Z_R0,dst_reg); 4892 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4893 // z_pfd(2,256,Z_R0,dst_reg); 4894 } 4895 4896 if (VM_Version::has_ExecuteExtensions()) { 4897 z_exrl(Z_R1, MVC_template); 4898 } else { 4899 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4900 } 4901 4902 bind(done); 4903 4904 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4905 4906 int block_end = offset(); 4907 return block_end - block_start; 4908 } 4909 4910 //------------------------------------------------- 4911 // Constants (scalar and oop) in constant pool 4912 //------------------------------------------------- 4913 4914 // Add a non-relocated constant to the CP. 4915 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4916 long value = val.value(); 4917 address tocPos = long_constant(value); 4918 4919 if (tocPos != nullptr) { 4920 int tocOffset = (int)(tocPos - code()->consts()->start()); 4921 return tocOffset; 4922 } 4923 // Address_constant returned null, so no constant entry has been created. 4924 // In that case, we return a "fatal" offset, just in case that subsequently 4925 // generated access code is executed. 4926 return -1; 4927 } 4928 4929 // Returns the TOC offset where the address is stored. 4930 // Add a relocated constant to the CP. 4931 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4932 // Use RelocationHolder::none for the constant pool entry. 4933 // Otherwise we will end up with a failing NativeCall::verify(x), 4934 // where x is the address of the constant pool entry. 4935 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4936 4937 if (tocPos != nullptr) { 4938 int tocOffset = (int)(tocPos - code()->consts()->start()); 4939 RelocationHolder rsp = oop.rspec(); 4940 Relocation *rel = rsp.reloc(); 4941 4942 // Store toc_offset in relocation, used by call_far_patchable. 4943 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4944 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4945 } 4946 // Relocate at the load's pc. 4947 relocate(rsp); 4948 4949 return tocOffset; 4950 } 4951 // Address_constant returned null, so no constant entry has been created 4952 // in that case, we return a "fatal" offset, just in case that subsequently 4953 // generated access code is executed. 4954 return -1; 4955 } 4956 4957 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4958 int tocOffset = store_const_in_toc(a); 4959 if (tocOffset == -1) return false; 4960 address tocPos = tocOffset + code()->consts()->start(); 4961 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4962 relocate(a.rspec()); 4963 load_long_pcrelative(dst, tocPos); 4964 return true; 4965 } 4966 4967 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4968 int tocOffset = store_oop_in_toc(a); 4969 if (tocOffset == -1) return false; 4970 address tocPos = tocOffset + code()->consts()->start(); 4971 assert((address)code()->consts()->start() != nullptr, "Please add CP address"); 4972 4973 load_addr_pcrelative(dst, tocPos); 4974 return true; 4975 } 4976 4977 // If the instruction sequence at the given pc is a load_const_from_toc 4978 // sequence, return the value currently stored at the referenced position 4979 // in the TOC. 4980 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4981 4982 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4983 4984 long offset = get_load_const_from_toc_offset(pc); 4985 address dataLoc = nullptr; 4986 if (is_load_const_from_toc_pcrelative(pc)) { 4987 dataLoc = pc + offset; 4988 } else { 4989 CodeBlob* cb = CodeCache::find_blob(pc); 4990 assert(cb && cb->is_nmethod(), "sanity"); 4991 nmethod* nm = (nmethod*)cb; 4992 dataLoc = nm->ctable_begin() + offset; 4993 } 4994 return *(intptr_t *)dataLoc; 4995 } 4996 4997 // If the instruction sequence at the given pc is a load_const_from_toc 4998 // sequence, copy the passed-in new_data value into the referenced 4999 // position in the TOC. 5000 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 5001 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 5002 5003 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 5004 address dataLoc = nullptr; 5005 if (is_load_const_from_toc_pcrelative(pc)) { 5006 dataLoc = pc+offset; 5007 } else { 5008 nmethod* nm = CodeCache::find_nmethod(pc); 5009 assert((cb == nullptr) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 5010 dataLoc = nm->ctable_begin() + offset; 5011 } 5012 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 5013 *(unsigned long *)dataLoc = new_data; 5014 } 5015 } 5016 5017 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 5018 // site. Verify by calling is_load_const_from_toc() before!! 5019 // Offset is +/- 2**32 -> use long. 5020 long MacroAssembler::get_load_const_from_toc_offset(address a) { 5021 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 5022 // expected code sequence: 5023 // z_lgrl(t, simm32); len = 6 5024 unsigned long inst; 5025 unsigned int len = get_instruction(a, &inst); 5026 return get_pcrel_offset(inst); 5027 } 5028 5029 //********************************************************************************** 5030 // inspection of generated instruction sequences for a particular pattern 5031 //********************************************************************************** 5032 5033 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 5034 #ifdef ASSERT 5035 unsigned long inst; 5036 unsigned int len = get_instruction(a+2, &inst); 5037 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 5038 const int range = 128; 5039 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 5040 VM_Version::z_SIGSEGV(); 5041 } 5042 #endif 5043 // expected code sequence: 5044 // z_lgrl(t, relAddr32); len = 6 5045 //TODO: verify accessed data is in CP, if possible. 5046 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 5047 } 5048 5049 bool MacroAssembler::is_load_const_from_toc_call(address a) { 5050 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 5051 } 5052 5053 bool MacroAssembler::is_load_const_call(address a) { 5054 return is_load_const(a) && is_call_byregister(a + load_const_size()); 5055 } 5056 5057 //------------------------------------------------- 5058 // Emitters for some really CICS instructions 5059 //------------------------------------------------- 5060 5061 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 5062 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 5063 assert(src->encoding()%2==0, "must be an even/odd register pair"); 5064 assert(pad<256, "must be a padding BYTE"); 5065 5066 Label retry; 5067 bind(retry); 5068 Assembler::z_mvcle(dst, src, pad); 5069 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5070 } 5071 5072 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 5073 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 5074 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 5075 assert(pad<256, "must be a padding BYTE"); 5076 5077 Label retry; 5078 bind(retry); 5079 Assembler::z_clcle(left, right, pad, Z_R0); 5080 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5081 } 5082 5083 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 5084 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 5085 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 5086 assert(pad<=0xfff, "must be a padding HALFWORD"); 5087 assert(VM_Version::has_ETF2(), "instruction must be available"); 5088 5089 Label retry; 5090 bind(retry); 5091 Assembler::z_clclu(left, right, pad, Z_R0); 5092 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5093 } 5094 5095 void MacroAssembler::search_string(Register end, Register start) { 5096 assert(end->encoding() != 0, "end address must not be in R0"); 5097 assert(start->encoding() != 0, "start address must not be in R0"); 5098 5099 Label retry; 5100 bind(retry); 5101 Assembler::z_srst(end, start); 5102 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5103 } 5104 5105 void MacroAssembler::search_string_uni(Register end, Register start) { 5106 assert(end->encoding() != 0, "end address must not be in R0"); 5107 assert(start->encoding() != 0, "start address must not be in R0"); 5108 assert(VM_Version::has_ETF3(), "instruction must be available"); 5109 5110 Label retry; 5111 bind(retry); 5112 Assembler::z_srstu(end, start); 5113 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5114 } 5115 5116 void MacroAssembler::kmac(Register srcBuff) { 5117 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5118 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 5119 5120 Label retry; 5121 bind(retry); 5122 Assembler::z_kmac(Z_R0, srcBuff); 5123 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5124 } 5125 5126 void MacroAssembler::kimd(Register srcBuff) { 5127 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5128 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 5129 5130 Label retry; 5131 bind(retry); 5132 Assembler::z_kimd(Z_R0, srcBuff); 5133 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5134 } 5135 5136 void MacroAssembler::klmd(Register srcBuff) { 5137 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5138 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 5139 5140 Label retry; 5141 bind(retry); 5142 Assembler::z_klmd(Z_R0, srcBuff); 5143 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5144 } 5145 5146 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 5147 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 5148 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 5149 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5150 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 5151 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 5152 5153 Label retry; 5154 bind(retry); 5155 Assembler::z_km(dstBuff, srcBuff); 5156 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5157 } 5158 5159 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 5160 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 5161 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 5162 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5163 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 5164 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 5165 5166 Label retry; 5167 bind(retry); 5168 Assembler::z_kmc(dstBuff, srcBuff); 5169 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5170 } 5171 5172 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) { 5173 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 5174 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 5175 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 5176 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0"); 5177 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0"); 5178 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register"); 5179 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 5180 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 5181 5182 Label retry; 5183 bind(retry); 5184 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff); 5185 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5186 } 5187 5188 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 5189 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 5190 5191 Label retry; 5192 bind(retry); 5193 Assembler::z_cksm(crcBuff, srcBuff); 5194 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5195 } 5196 5197 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 5198 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 5199 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 5200 5201 Label retry; 5202 bind(retry); 5203 Assembler::z_troo(r1, r2, m3); 5204 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5205 } 5206 5207 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 5208 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 5209 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 5210 5211 Label retry; 5212 bind(retry); 5213 Assembler::z_trot(r1, r2, m3); 5214 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5215 } 5216 5217 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 5218 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 5219 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 5220 5221 Label retry; 5222 bind(retry); 5223 Assembler::z_trto(r1, r2, m3); 5224 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5225 } 5226 5227 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 5228 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 5229 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 5230 5231 Label retry; 5232 bind(retry); 5233 Assembler::z_trtt(r1, r2, m3); 5234 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 5235 } 5236 5237 //--------------------------------------- 5238 // Helpers for Intrinsic Emitters 5239 //--------------------------------------- 5240 5241 /** 5242 * uint32_t crc; 5243 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 5244 */ 5245 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 5246 assert_different_registers(crc, table, tmp); 5247 assert_different_registers(val, table); 5248 if (crc == val) { // Must rotate first to use the unmodified value. 5249 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 5250 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 5251 } else { 5252 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 5253 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 5254 } 5255 z_x(crc, Address(table, tmp, 0)); 5256 } 5257 5258 /** 5259 * uint32_t crc; 5260 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 5261 */ 5262 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 5263 fold_byte_crc32(crc, crc, table, tmp); 5264 } 5265 5266 /** 5267 * Emits code to update CRC-32 with a byte value according to constants in table. 5268 * 5269 * @param [in,out]crc Register containing the crc. 5270 * @param [in]val Register containing the byte to fold into the CRC. 5271 * @param [in]table Register containing the table of crc constants. 5272 * 5273 * uint32_t crc; 5274 * val = crc_table[(val ^ crc) & 0xFF]; 5275 * crc = val ^ (crc >> 8); 5276 */ 5277 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 5278 z_xr(val, crc); 5279 fold_byte_crc32(crc, val, table, val); 5280 } 5281 5282 5283 /** 5284 * @param crc register containing existing CRC (32-bit) 5285 * @param buf register pointing to input byte buffer (byte*) 5286 * @param len register containing number of bytes 5287 * @param table register pointing to CRC table 5288 */ 5289 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 5290 assert_different_registers(crc, buf, len, table, data); 5291 5292 Label L_mainLoop, L_done; 5293 const int mainLoop_stepping = 1; 5294 5295 // Process all bytes in a single-byte loop. 5296 z_ltr(len, len); 5297 z_brnh(L_done); 5298 5299 bind(L_mainLoop); 5300 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 5301 add2reg(buf, mainLoop_stepping); // Advance buffer position. 5302 update_byte_crc32(crc, data, table); 5303 z_brct(len, L_mainLoop); // Iterate. 5304 5305 bind(L_done); 5306 } 5307 5308 /** 5309 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 5310 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 5311 * 5312 */ 5313 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 5314 Register t0, Register t1, Register t2, Register t3) { 5315 // This is what we implement (the DOBIG4 part): 5316 // 5317 // #define DOBIG4 c ^= *++buf4; \ 5318 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 5319 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 5320 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 5321 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 5322 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 5323 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 5324 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 5325 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 5326 5327 // XOR crc with next four bytes of buffer. 5328 lgr_if_needed(t0, crc); 5329 z_x(t0, Address(buf, bufDisp)); 5330 if (bufInc != 0) { 5331 add2reg(buf, bufInc); 5332 } 5333 5334 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 5335 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 5336 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 5337 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 5338 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 5339 5340 // XOR indexed table values to calculate updated crc. 5341 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 5342 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 5343 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 5344 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 5345 z_xr(t0, t2); // Now t0 contains the updated CRC value. 5346 lgr_if_needed(crc, t0); 5347 } 5348 5349 /** 5350 * @param crc register containing existing CRC (32-bit) 5351 * @param buf register pointing to input byte buffer (byte*) 5352 * @param len register containing number of bytes 5353 * @param table register pointing to CRC table 5354 * 5355 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 5356 */ 5357 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 5358 Register t0, Register t1, Register t2, Register t3, 5359 bool invertCRC) { 5360 assert_different_registers(crc, buf, len, table); 5361 5362 Label L_mainLoop, L_tail; 5363 Register data = t0; 5364 Register ctr = Z_R0; 5365 const int mainLoop_stepping = 4; 5366 const int log_stepping = exact_log2(mainLoop_stepping); 5367 5368 // Don't test for len <= 0 here. This pathological case should not occur anyway. 5369 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 5370 // The situation itself is detected and handled correctly by the conditional branches 5371 // following aghi(len, -stepping) and aghi(len, +stepping). 5372 5373 if (invertCRC) { 5374 not_(crc, noreg, false); // 1s complement of crc 5375 } 5376 5377 // Check for short (<4 bytes) buffer. 5378 z_srag(ctr, len, log_stepping); 5379 z_brnh(L_tail); 5380 5381 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 5382 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 5383 5384 BIND(L_mainLoop); 5385 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 5386 z_brct(ctr, L_mainLoop); // Iterate. 5387 5388 z_lrvr(crc, crc); // Revert byte order back to original. 5389 5390 // Process last few (<8) bytes of buffer. 5391 BIND(L_tail); 5392 update_byteLoop_crc32(crc, buf, len, table, data); 5393 5394 if (invertCRC) { 5395 not_(crc, noreg, false); // 1s complement of crc 5396 } 5397 } 5398 5399 /** 5400 * @param crc register containing existing CRC (32-bit) 5401 * @param buf register pointing to input byte buffer (byte*) 5402 * @param len register containing number of bytes 5403 * @param table register pointing to CRC table 5404 */ 5405 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 5406 Register t0, Register t1, Register t2, Register t3, 5407 bool invertCRC) { 5408 assert_different_registers(crc, buf, len, table); 5409 Register data = t0; 5410 5411 if (invertCRC) { 5412 not_(crc, noreg, false); // 1s complement of crc 5413 } 5414 5415 update_byteLoop_crc32(crc, buf, len, table, data); 5416 5417 if (invertCRC) { 5418 not_(crc, noreg, false); // 1s complement of crc 5419 } 5420 } 5421 5422 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 5423 bool invertCRC) { 5424 assert_different_registers(crc, buf, len, table, tmp); 5425 5426 if (invertCRC) { 5427 not_(crc, noreg, false); // 1s complement of crc 5428 } 5429 5430 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 5431 update_byte_crc32(crc, tmp, table); 5432 5433 if (invertCRC) { 5434 not_(crc, noreg, false); // 1s complement of crc 5435 } 5436 } 5437 5438 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 5439 bool invertCRC) { 5440 assert_different_registers(crc, val, table); 5441 5442 if (invertCRC) { 5443 not_(crc, noreg, false); // 1s complement of crc 5444 } 5445 5446 update_byte_crc32(crc, val, table); 5447 5448 if (invertCRC) { 5449 not_(crc, noreg, false); // 1s complement of crc 5450 } 5451 } 5452 5453 // 5454 // Code for BigInteger::multiplyToLen() intrinsic. 5455 // 5456 5457 // dest_lo += src1 + src2 5458 // dest_hi += carry1 + carry2 5459 // Z_R7 is destroyed ! 5460 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 5461 Register src1, Register src2) { 5462 clear_reg(Z_R7); 5463 z_algr(dest_lo, src1); 5464 z_alcgr(dest_hi, Z_R7); 5465 z_algr(dest_lo, src2); 5466 z_alcgr(dest_hi, Z_R7); 5467 } 5468 5469 // Multiply 64 bit by 64 bit first loop. 5470 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 5471 Register x_xstart, 5472 Register y, Register y_idx, 5473 Register z, 5474 Register carry, 5475 Register product, 5476 Register idx, Register kdx) { 5477 // jlong carry, x[], y[], z[]; 5478 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 5479 // huge_128 product = y[idx] * x[xstart] + carry; 5480 // z[kdx] = (jlong)product; 5481 // carry = (jlong)(product >>> 64); 5482 // } 5483 // z[xstart] = carry; 5484 5485 Label L_first_loop, L_first_loop_exit; 5486 Label L_one_x, L_one_y, L_multiply; 5487 5488 z_aghi(xstart, -1); 5489 z_brl(L_one_x); // Special case: length of x is 1. 5490 5491 // Load next two integers of x. 5492 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5493 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5494 5495 5496 bind(L_first_loop); 5497 5498 z_aghi(idx, -1); 5499 z_brl(L_first_loop_exit); 5500 z_aghi(idx, -1); 5501 z_brl(L_one_y); 5502 5503 // Load next two integers of y. 5504 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 5505 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 5506 5507 5508 bind(L_multiply); 5509 5510 Register multiplicand = product->successor(); 5511 Register product_low = multiplicand; 5512 5513 lgr_if_needed(multiplicand, x_xstart); 5514 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 5515 clear_reg(Z_R7); 5516 z_algr(product_low, carry); // Add carry to result. 5517 z_alcgr(product, Z_R7); // Add carry of the last addition. 5518 add2reg(kdx, -2); 5519 5520 // Store result. 5521 z_sllg(Z_R7, kdx, LogBytesPerInt); 5522 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5523 lgr_if_needed(carry, product); 5524 z_bru(L_first_loop); 5525 5526 5527 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5528 5529 clear_reg(y_idx); 5530 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5531 z_bru(L_multiply); 5532 5533 5534 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5535 5536 clear_reg(x_xstart); 5537 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5538 z_bru(L_first_loop); 5539 5540 bind(L_first_loop_exit); 5541 } 5542 5543 // Multiply 64 bit by 64 bit and add 128 bit. 5544 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5545 Register z, 5546 Register yz_idx, Register idx, 5547 Register carry, Register product, 5548 int offset) { 5549 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5550 // z[kdx] = (jlong)product; 5551 5552 Register multiplicand = product->successor(); 5553 Register product_low = multiplicand; 5554 5555 z_sllg(Z_R7, idx, LogBytesPerInt); 5556 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5557 5558 lgr_if_needed(multiplicand, x_xstart); 5559 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5560 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5561 5562 add2_with_carry(product, product_low, carry, yz_idx); 5563 5564 z_sllg(Z_R7, idx, LogBytesPerInt); 5565 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5566 5567 } 5568 5569 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5570 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5571 Register y, Register z, 5572 Register yz_idx, Register idx, 5573 Register jdx, 5574 Register carry, Register product, 5575 Register carry2) { 5576 // jlong carry, x[], y[], z[]; 5577 // int kdx = ystart+1; 5578 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5579 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5580 // z[kdx+idx+1] = (jlong)product; 5581 // jlong carry2 = (jlong)(product >>> 64); 5582 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5583 // z[kdx+idx] = (jlong)product; 5584 // carry = (jlong)(product >>> 64); 5585 // } 5586 // idx += 2; 5587 // if (idx > 0) { 5588 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5589 // z[kdx+idx] = (jlong)product; 5590 // carry = (jlong)(product >>> 64); 5591 // } 5592 5593 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5594 5595 // scale the index 5596 lgr_if_needed(jdx, idx); 5597 and_imm(jdx, 0xfffffffffffffffcL); 5598 rshift(jdx, 2); 5599 5600 5601 bind(L_third_loop); 5602 5603 z_aghi(jdx, -1); 5604 z_brl(L_third_loop_exit); 5605 add2reg(idx, -4); 5606 5607 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5608 lgr_if_needed(carry2, product); 5609 5610 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5611 lgr_if_needed(carry, product); 5612 z_bru(L_third_loop); 5613 5614 5615 bind(L_third_loop_exit); // Handle any left-over operand parts. 5616 5617 and_imm(idx, 0x3); 5618 z_brz(L_post_third_loop_done); 5619 5620 Label L_check_1; 5621 5622 z_aghi(idx, -2); 5623 z_brl(L_check_1); 5624 5625 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5626 lgr_if_needed(carry, product); 5627 5628 5629 bind(L_check_1); 5630 5631 add2reg(idx, 0x2); 5632 and_imm(idx, 0x1); 5633 z_aghi(idx, -1); 5634 z_brl(L_post_third_loop_done); 5635 5636 Register multiplicand = product->successor(); 5637 Register product_low = multiplicand; 5638 5639 z_sllg(Z_R7, idx, LogBytesPerInt); 5640 clear_reg(yz_idx); 5641 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5642 lgr_if_needed(multiplicand, x_xstart); 5643 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5644 clear_reg(yz_idx); 5645 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5646 5647 add2_with_carry(product, product_low, yz_idx, carry); 5648 5649 z_sllg(Z_R7, idx, LogBytesPerInt); 5650 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5651 rshift(product_low, 32); 5652 5653 lshift(product, 32); 5654 z_ogr(product_low, product); 5655 lgr_if_needed(carry, product_low); 5656 5657 bind(L_post_third_loop_done); 5658 } 5659 5660 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5661 Register y, Register ylen, 5662 Register z, 5663 Register tmp1, Register tmp2, 5664 Register tmp3, Register tmp4, 5665 Register tmp5) { 5666 ShortBranchVerifier sbv(this); 5667 5668 assert_different_registers(x, xlen, y, ylen, z, 5669 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5670 assert_different_registers(x, xlen, y, ylen, z, 5671 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5672 5673 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5674 5675 const Register idx = tmp1; 5676 const Register kdx = tmp2; 5677 const Register xstart = tmp3; 5678 5679 const Register y_idx = tmp4; 5680 const Register carry = tmp5; 5681 const Register product = Z_R0_scratch; 5682 const Register x_xstart = Z_R8; 5683 5684 // First Loop. 5685 // 5686 // final static long LONG_MASK = 0xffffffffL; 5687 // int xstart = xlen - 1; 5688 // int ystart = ylen - 1; 5689 // long carry = 0; 5690 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5691 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5692 // z[kdx] = (int)product; 5693 // carry = product >>> 32; 5694 // } 5695 // z[xstart] = (int)carry; 5696 // 5697 5698 lgr_if_needed(idx, ylen); // idx = ylen 5699 z_agrk(kdx, xlen, ylen); // kdx = xlen + ylen 5700 clear_reg(carry); // carry = 0 5701 5702 Label L_done; 5703 5704 lgr_if_needed(xstart, xlen); 5705 z_aghi(xstart, -1); 5706 z_brl(L_done); 5707 5708 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5709 5710 NearLabel L_second_loop; 5711 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5712 5713 NearLabel L_carry; 5714 z_aghi(kdx, -1); 5715 z_brz(L_carry); 5716 5717 // Store lower 32 bits of carry. 5718 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5719 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5720 rshift(carry, 32); 5721 z_aghi(kdx, -1); 5722 5723 5724 bind(L_carry); 5725 5726 // Store upper 32 bits of carry. 5727 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5728 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5729 5730 // Second and third (nested) loops. 5731 // 5732 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5733 // carry = 0; 5734 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5735 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5736 // (z[k] & LONG_MASK) + carry; 5737 // z[k] = (int)product; 5738 // carry = product >>> 32; 5739 // } 5740 // z[i] = (int)carry; 5741 // } 5742 // 5743 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5744 5745 const Register jdx = tmp1; 5746 5747 bind(L_second_loop); 5748 5749 clear_reg(carry); // carry = 0; 5750 lgr_if_needed(jdx, ylen); // j = ystart+1 5751 5752 z_aghi(xstart, -1); // i = xstart-1; 5753 z_brl(L_done); 5754 5755 // Use free slots in the current stackframe instead of push/pop. 5756 Address zsave(Z_SP, _z_abi(carg_1)); 5757 reg2mem_opt(z, zsave); 5758 5759 5760 Label L_last_x; 5761 5762 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5763 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5764 z_aghi(xstart, -1); // i = xstart-1; 5765 z_brl(L_last_x); 5766 5767 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5768 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5769 5770 5771 Label L_third_loop_prologue; 5772 5773 bind(L_third_loop_prologue); 5774 5775 Address xsave(Z_SP, _z_abi(carg_2)); 5776 Address xlensave(Z_SP, _z_abi(carg_3)); 5777 Address ylensave(Z_SP, _z_abi(carg_4)); 5778 5779 reg2mem_opt(x, xsave); 5780 reg2mem_opt(xstart, xlensave); 5781 reg2mem_opt(ylen, ylensave); 5782 5783 5784 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5785 5786 mem2reg_opt(z, zsave); 5787 mem2reg_opt(x, xsave); 5788 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5789 mem2reg_opt(ylen, ylensave); 5790 5791 add2reg(tmp3, 1, xlen); 5792 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5793 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5794 z_aghi(tmp3, -1); 5795 z_brl(L_done); 5796 5797 rshift(carry, 32); 5798 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5799 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5800 z_bru(L_second_loop); 5801 5802 // Next infrequent code is moved outside loops. 5803 bind(L_last_x); 5804 5805 clear_reg(x_xstart); 5806 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5807 z_bru(L_third_loop_prologue); 5808 5809 bind(L_done); 5810 5811 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5812 } 5813 5814 void MacroAssembler::asm_assert(branch_condition cond, const char* msg, int id, bool is_static) { 5815 #ifdef ASSERT 5816 Label ok; 5817 z_brc(cond, ok); 5818 is_static ? stop_static(msg, id) : stop(msg, id); 5819 bind(ok); 5820 #endif // ASSERT 5821 } 5822 5823 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5824 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5825 #ifdef ASSERT 5826 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id); 5827 #endif // ASSERT 5828 } 5829 5830 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5831 Register mem_base, const char* msg, int id) { 5832 #ifdef ASSERT 5833 switch (size) { 5834 case 4: 5835 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5836 break; 5837 case 8: 5838 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5839 break; 5840 default: 5841 ShouldNotReachHere(); 5842 } 5843 // if relocation is not allowed then stop_static() will be called otherwise call stop() 5844 asm_assert(check_equal ? bcondEqual : bcondNotEqual, msg, id, !allow_relocation); 5845 #endif // ASSERT 5846 } 5847 5848 // Check the condition 5849 // expected_size == FP - SP 5850 // after transformation: 5851 // expected_size - FP + SP == 0 5852 // Destroys Register expected_size if no tmp register is passed. 5853 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5854 #ifdef ASSERT 5855 lgr_if_needed(tmp, expected_size); 5856 z_algr(tmp, Z_SP); 5857 z_slg(tmp, 0, Z_R0, Z_SP); 5858 asm_assert(bcondEqual, msg, id); 5859 #endif // ASSERT 5860 } 5861 5862 // Save and restore functions: Exclude Z_R0. 5863 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5864 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5865 if (include_fp) { 5866 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5867 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5868 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5869 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5870 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5871 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5872 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5873 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5874 } 5875 if (include_flags) { 5876 Label done; 5877 z_mvi(Address(dst, offset), 2); // encoding: equal 5878 z_bre(done); 5879 z_mvi(Address(dst, offset), 4); // encoding: higher 5880 z_brh(done); 5881 z_mvi(Address(dst, offset), 1); // encoding: lower 5882 bind(done); 5883 } 5884 } 5885 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5886 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5887 if (include_fp) { 5888 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5889 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5890 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5891 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5892 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5893 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5894 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5895 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5896 } 5897 if (include_flags) { 5898 z_cli(Address(src, offset), 2); // see encoding above 5899 } 5900 } 5901 5902 // Plausibility check for oops. 5903 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5904 if (!VerifyOops) return; 5905 5906 BLOCK_COMMENT("verify_oop {"); 5907 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5908 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5909 5910 save_return_pc(); 5911 5912 // Push frame, but preserve flags 5913 z_lgr(Z_R0, Z_SP); 5914 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5915 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5916 5917 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5918 5919 lgr_if_needed(Z_ARG2, oop); 5920 load_const_optimized(Z_ARG1, (address)msg); 5921 load_const_optimized(Z_R1, entry_addr); 5922 z_lg(Z_R1, 0, Z_R1); 5923 call_c(Z_R1); 5924 5925 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5926 pop_frame(); 5927 restore_return_pc(); 5928 5929 BLOCK_COMMENT("} verify_oop "); 5930 } 5931 5932 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5933 if (!VerifyOops) return; 5934 5935 BLOCK_COMMENT("verify_oop {"); 5936 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5937 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5938 5939 save_return_pc(); 5940 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5941 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5942 5943 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5944 load_const_optimized(Z_ARG1, (address)msg); 5945 load_const_optimized(Z_R1, entry_addr); 5946 z_lg(Z_R1, 0, Z_R1); 5947 call_c(Z_R1); 5948 5949 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5950 pop_frame(); 5951 restore_return_pc(); 5952 5953 BLOCK_COMMENT("} verify_oop "); 5954 } 5955 5956 const char* MacroAssembler::stop_types[] = { 5957 "stop", 5958 "untested", 5959 "unimplemented", 5960 "shouldnotreachhere" 5961 }; 5962 5963 static void stop_on_request(const char* tp, const char* msg) { 5964 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5965 guarantee(false, "Z assembly code requires stop: %s", msg); 5966 } 5967 5968 void MacroAssembler::stop(int type, const char* msg, int id) { 5969 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5970 5971 // Setup arguments. 5972 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5973 load_const(Z_ARG2, (void*) msg); 5974 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5975 save_return_pc(); // Saves return pc Z_R14. 5976 push_frame_abi160(0); 5977 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5978 // The plain disassembler does not recognize illtrap. It instead displays 5979 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5980 // the proper beginning of the next instruction. 5981 z_illtrap(id); // Illegal instruction. 5982 z_illtrap(id); // Illegal instruction. 5983 5984 BLOCK_COMMENT(" } stop"); 5985 } 5986 5987 // Special version of stop() for code size reduction. 5988 // Reuses the previously generated call sequence, if any. 5989 // Generates the call sequence on its own, if necessary. 5990 // Note: This code will work only in non-relocatable code! 5991 // The relative address of the data elements (arg1, arg2) must not change. 5992 // The reentry point must not move relative to it's users. This prerequisite 5993 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5994 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5995 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5996 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==nullptr?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5997 5998 // Setup arguments. 5999 if (allow_relocation) { 6000 // Relocatable version (for comparison purposes). Remove after some time. 6001 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 6002 load_const(Z_ARG2, (void*) msg); 6003 } else { 6004 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 6005 load_absolute_address(Z_ARG2, (address)msg); 6006 } 6007 if ((reentry != nullptr) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 6008 BLOCK_COMMENT("branch to reentry point:"); 6009 z_brc(bcondAlways, reentry); 6010 } else { 6011 BLOCK_COMMENT("reentry point:"); 6012 reentry = pc(); // Re-entry point for subsequent stop calls. 6013 save_return_pc(); // Saves return pc Z_R14. 6014 push_frame_abi160(0); 6015 if (allow_relocation) { 6016 reentry = nullptr; // Prevent reentry if code relocation is allowed. 6017 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 6018 } else { 6019 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 6020 } 6021 z_illtrap(id); // Illegal instruction as emergency stop, should the above call return. 6022 } 6023 BLOCK_COMMENT(" } stop_chain"); 6024 6025 return reentry; 6026 } 6027 6028 // Special version of stop() for code size reduction. 6029 // Assumes constant relative addresses for data and runtime call. 6030 void MacroAssembler::stop_static(int type, const char* msg, int id) { 6031 stop_chain(nullptr, type, msg, id, false); 6032 } 6033 6034 void MacroAssembler::stop_subroutine() { 6035 unimplemented("stop_subroutine", 710); 6036 } 6037 6038 // Prints msg to stdout from within generated code.. 6039 void MacroAssembler::warn(const char* msg) { 6040 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 6041 load_absolute_address(Z_R1, (address) warning); 6042 load_absolute_address(Z_ARG1, (address) msg); 6043 (void) call(Z_R1); 6044 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 6045 } 6046 6047 #ifndef PRODUCT 6048 6049 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 6050 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 6051 if (!ZapEmptyStackFields) return; 6052 BLOCK_COMMENT("zap memory region {"); 6053 load_const_optimized(val, 0x0101010101010101); 6054 int size = before + after; 6055 if (low == high && size < 5 && size > 0) { 6056 int offset = -before*BytesPerWord; 6057 for (int i = 0; i < size; ++i) { 6058 z_stg(val, Address(low, offset)); 6059 offset +=(1*BytesPerWord); 6060 } 6061 } else { 6062 add2reg(addr, -before*BytesPerWord, low); 6063 if (after) { 6064 #ifdef ASSERT 6065 jlong check = after * BytesPerWord; 6066 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 6067 #endif 6068 add2reg(high, after * BytesPerWord); 6069 } 6070 NearLabel loop; 6071 bind(loop); 6072 z_stg(val, Address(addr)); 6073 add2reg(addr, 8); 6074 compare64_and_branch(addr, high, bcondNotHigh, loop); 6075 if (after) { 6076 add2reg(high, -after * BytesPerWord); 6077 } 6078 } 6079 BLOCK_COMMENT("} zap memory region"); 6080 } 6081 #endif // !PRODUCT 6082 6083 // Implements lightweight-locking. 6084 // - obj: the object to be locked, contents preserved. 6085 // - temp1, temp2: temporary registers, contents destroyed. 6086 // Note: make sure Z_R1 is not manipulated here when C2 compiler is in play 6087 void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) { 6088 6089 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 6090 assert_different_registers(basic_lock, obj, temp1, temp2); 6091 6092 Label push; 6093 const Register top = temp1; 6094 const Register mark = temp2; 6095 const int mark_offset = oopDesc::mark_offset_in_bytes(); 6096 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset(); 6097 6098 // Preload the markWord. It is important that this is the first 6099 // instruction emitted as it is part of C1's null check semantics. 6100 z_lg(mark, Address(obj, mark_offset)); 6101 6102 if (UseObjectMonitorTable) { 6103 // Clear cache in case fast locking succeeds. 6104 const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))); 6105 z_mvghi(om_cache_addr, 0); 6106 } 6107 6108 // First we need to check if the lock-stack has room for pushing the object reference. 6109 z_lgf(top, Address(Z_thread, ls_top_offset)); 6110 6111 compareU32_and_branch(top, (unsigned)LockStack::end_offset(), bcondNotLow, slow); 6112 6113 // The underflow check is elided. The recursive check will always fail 6114 // when the lock stack is empty because of the _bad_oop_sentinel field. 6115 6116 // Check for recursion: 6117 z_aghi(top, -oopSize); 6118 z_cg(obj, Address(Z_thread, top)); 6119 z_bre(push); 6120 6121 // Check header for monitor (0b10). 6122 z_tmll(mark, markWord::monitor_value); 6123 branch_optimized(bcondNotAllZero, slow); 6124 6125 { // Try to lock. Transition lock bits 0b01 => 0b00 6126 const Register locked_obj = top; 6127 z_oill(mark, markWord::unlocked_value); 6128 z_lgr(locked_obj, mark); 6129 // Clear lock-bits from locked_obj (locked state) 6130 z_xilf(locked_obj, markWord::unlocked_value); 6131 z_csg(mark, locked_obj, mark_offset, obj); 6132 branch_optimized(Assembler::bcondNotEqual, slow); 6133 } 6134 6135 bind(push); 6136 6137 // After successful lock, push object on lock-stack 6138 z_lgf(top, Address(Z_thread, ls_top_offset)); 6139 z_stg(obj, Address(Z_thread, top)); 6140 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize); 6141 } 6142 6143 // Implements lightweight-unlocking. 6144 // - obj: the object to be unlocked 6145 // - temp1, temp2: temporary registers, will be destroyed 6146 // - Z_R1_scratch: will be killed in case of Interpreter & C1 Compiler 6147 void MacroAssembler::lightweight_unlock(Register obj, Register temp1, Register temp2, Label& slow) { 6148 6149 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 6150 assert_different_registers(obj, temp1, temp2); 6151 6152 Label unlocked, push_and_slow; 6153 const Register mark = temp1; 6154 const Register top = temp2; 6155 const int mark_offset = oopDesc::mark_offset_in_bytes(); 6156 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset(); 6157 6158 #ifdef ASSERT 6159 { 6160 // The following checks rely on the fact that LockStack is only ever modified by 6161 // its owning thread, even if the lock got inflated concurrently; removal of LockStack 6162 // entries after inflation will happen delayed in that case. 6163 6164 // Check for lock-stack underflow. 6165 NearLabel stack_ok; 6166 z_lgf(top, Address(Z_thread, ls_top_offset)); 6167 compareU32_and_branch(top, (unsigned)LockStack::start_offset(), bcondNotLow, stack_ok); 6168 stop("Lock-stack underflow"); 6169 bind(stack_ok); 6170 } 6171 #endif // ASSERT 6172 6173 // Check if obj is top of lock-stack. 6174 z_lgf(top, Address(Z_thread, ls_top_offset)); 6175 z_aghi(top, -oopSize); 6176 z_cg(obj, Address(Z_thread, top)); 6177 branch_optimized(bcondNotEqual, slow); 6178 6179 // pop object from lock-stack 6180 #ifdef ASSERT 6181 const Register temp_top = temp1; // mark is not yet loaded, but be careful 6182 z_agrk(temp_top, top, Z_thread); 6183 z_xc(0, oopSize-1, temp_top, 0, temp_top); // wipe out lock-stack entry 6184 #endif // ASSERT 6185 z_alsi(in_bytes(ls_top_offset), Z_thread, -oopSize); // pop object 6186 6187 // The underflow check is elided. The recursive check will always fail 6188 // when the lock stack is empty because of the _bad_oop_sentinel field. 6189 6190 // Check if recursive. (this is a check for the 2nd object on the stack) 6191 z_aghi(top, -oopSize); 6192 z_cg(obj, Address(Z_thread, top)); 6193 branch_optimized(bcondEqual, unlocked); 6194 6195 // Not recursive. Check header for monitor (0b10). 6196 z_lg(mark, Address(obj, mark_offset)); 6197 z_tmll(mark, markWord::monitor_value); 6198 z_brnaz(push_and_slow); 6199 6200 #ifdef ASSERT 6201 // Check header not unlocked (0b01). 6202 NearLabel not_unlocked; 6203 z_tmll(mark, markWord::unlocked_value); 6204 z_braz(not_unlocked); 6205 stop("lightweight_unlock already unlocked"); 6206 bind(not_unlocked); 6207 #endif // ASSERT 6208 6209 { // Try to unlock. Transition lock bits 0b00 => 0b01 6210 Register unlocked_obj = top; 6211 z_lgr(unlocked_obj, mark); 6212 z_oill(unlocked_obj, markWord::unlocked_value); 6213 z_csg(mark, unlocked_obj, mark_offset, obj); 6214 branch_optimized(Assembler::bcondEqual, unlocked); 6215 } 6216 6217 bind(push_and_slow); 6218 6219 // Restore lock-stack and handle the unlock in runtime. 6220 z_lgf(top, Address(Z_thread, ls_top_offset)); 6221 DEBUG_ONLY(z_stg(obj, Address(Z_thread, top));) 6222 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize); 6223 // set CC to NE 6224 z_ltgr(obj, obj); // object shouldn't be null at this point 6225 branch_optimized(bcondAlways, slow); 6226 6227 bind(unlocked); 6228 } 6229 6230 void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) { 6231 assert_different_registers(obj, box, tmp1, tmp2, Z_R0_scratch); 6232 6233 // Handle inflated monitor. 6234 NearLabel inflated; 6235 // Finish fast lock successfully. MUST reach to with flag == NE 6236 NearLabel locked; 6237 // Finish fast lock unsuccessfully. MUST branch to with flag == EQ 6238 NearLabel slow_path; 6239 6240 if (UseObjectMonitorTable) { 6241 // Clear cache in case fast locking succeeds. 6242 z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0); 6243 } 6244 6245 if (DiagnoseSyncOnValueBasedClasses != 0) { 6246 load_klass(tmp1, obj); 6247 z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); 6248 z_brne(slow_path); 6249 } 6250 6251 const Register mark = tmp1; 6252 const int mark_offset = oopDesc::mark_offset_in_bytes(); 6253 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset(); 6254 6255 BLOCK_COMMENT("compiler_fast_lightweight_locking {"); 6256 { // lightweight locking 6257 6258 // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ 6259 NearLabel push; 6260 6261 const Register top = tmp2; 6262 6263 // Check if lock-stack is full. 6264 z_lgf(top, Address(Z_thread, ls_top_offset)); 6265 compareU32_and_branch(top, (unsigned) LockStack::end_offset() - 1, bcondHigh, slow_path); 6266 6267 // The underflow check is elided. The recursive check will always fail 6268 // when the lock stack is empty because of the _bad_oop_sentinel field. 6269 6270 // Check if recursive. 6271 z_aghi(top, -oopSize); 6272 z_cg(obj, Address(Z_thread, top)); 6273 z_bre(push); 6274 6275 // Check for monitor (0b10) 6276 z_lg(mark, Address(obj, mark_offset)); 6277 z_tmll(mark, markWord::monitor_value); 6278 z_brnaz(inflated); 6279 6280 // not inflated 6281 6282 { // Try to lock. Transition lock bits 0b01 => 0b00 6283 assert(mark_offset == 0, "required to avoid a lea"); 6284 const Register locked_obj = top; 6285 z_oill(mark, markWord::unlocked_value); 6286 z_lgr(locked_obj, mark); 6287 // Clear lock-bits from locked_obj (locked state) 6288 z_xilf(locked_obj, markWord::unlocked_value); 6289 z_csg(mark, locked_obj, mark_offset, obj); 6290 branch_optimized(Assembler::bcondNotEqual, slow_path); 6291 } 6292 6293 bind(push); 6294 6295 // After successful lock, push object on lock-stack. 6296 z_lgf(top, Address(Z_thread, ls_top_offset)); 6297 z_stg(obj, Address(Z_thread, top)); 6298 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize); 6299 6300 z_cgr(obj, obj); // set the CC to EQ, as it could be changed by alsi 6301 z_bru(locked); 6302 } 6303 BLOCK_COMMENT("} compiler_fast_lightweight_locking"); 6304 6305 BLOCK_COMMENT("handle_inflated_monitor_lightweight_locking {"); 6306 { // Handle inflated monitor. 6307 bind(inflated); 6308 6309 const Register tmp1_monitor = tmp1; 6310 if (!UseObjectMonitorTable) { 6311 assert(tmp1_monitor == mark, "should be the same here"); 6312 } else { 6313 NearLabel monitor_found; 6314 6315 // load cache address 6316 z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset())); 6317 6318 const int num_unrolled = 2; 6319 for (int i = 0; i < num_unrolled; i++) { 6320 z_cg(obj, Address(tmp1)); 6321 z_bre(monitor_found); 6322 add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); 6323 } 6324 6325 NearLabel loop; 6326 // Search for obj in cache 6327 6328 bind(loop); 6329 6330 // check for match. 6331 z_cg(obj, Address(tmp1)); 6332 z_bre(monitor_found); 6333 6334 // search until null encountered, guaranteed _null_sentinel at end. 6335 add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); 6336 z_cghsi(0, tmp1, 0); 6337 z_brne(loop); // if not EQ to 0, go for another loop 6338 6339 // we reached to the end, cache miss 6340 z_ltgr(obj, obj); // set CC to NE 6341 z_bru(slow_path); 6342 6343 // cache hit 6344 bind(monitor_found); 6345 z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference())); 6346 } 6347 NearLabel monitor_locked; 6348 // lock the monitor 6349 6350 // mark contains the tagged ObjectMonitor*. 6351 const Register tagged_monitor = mark; 6352 const Register zero = tmp2; 6353 6354 const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value)); 6355 const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag); 6356 const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag); 6357 6358 6359 // Try to CAS owner (no owner => current thread's _lock_id). 6360 // If csg succeeds then CR=EQ, otherwise, register zero is filled 6361 // with the current owner. 6362 z_lghi(zero, 0); 6363 z_lg(Z_R0_scratch, Address(Z_thread, JavaThread::lock_id_offset())); 6364 z_csg(zero, Z_R0_scratch, owner_address); 6365 z_bre(monitor_locked); 6366 6367 // Check if recursive. 6368 z_cgr(Z_R0_scratch, zero); // zero contains the owner from z_csg instruction 6369 z_brne(slow_path); 6370 6371 // Recursive 6372 z_agsi(recursions_address, 1ll); 6373 6374 bind(monitor_locked); 6375 if (UseObjectMonitorTable) { 6376 // Cache the monitor for unlock 6377 z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); 6378 } 6379 // set the CC now 6380 z_cgr(obj, obj); 6381 } 6382 BLOCK_COMMENT("} handle_inflated_monitor_lightweight_locking"); 6383 6384 bind(locked); 6385 6386 #ifdef ASSERT 6387 // Check that locked label is reached with flag == EQ. 6388 NearLabel flag_correct; 6389 z_bre(flag_correct); 6390 stop("CC is not set to EQ, it should be - lock"); 6391 #endif // ASSERT 6392 6393 bind(slow_path); 6394 6395 #ifdef ASSERT 6396 // Check that slow_path label is reached with flag == NE. 6397 z_brne(flag_correct); 6398 stop("CC is not set to NE, it should be - lock"); 6399 bind(flag_correct); 6400 #endif // ASSERT 6401 6402 // C2 uses the value of flag (NE vs EQ) to determine the continuation. 6403 } 6404 6405 void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) { 6406 assert_different_registers(obj, box, tmp1, tmp2); 6407 6408 // Handle inflated monitor. 6409 NearLabel inflated, inflated_load_mark; 6410 // Finish fast unlock successfully. MUST reach to with flag == EQ. 6411 NearLabel unlocked; 6412 // Finish fast unlock unsuccessfully. MUST branch to with flag == NE. 6413 NearLabel slow_path; 6414 6415 const Register mark = tmp1; 6416 const Register top = tmp2; 6417 const int mark_offset = oopDesc::mark_offset_in_bytes(); 6418 const ByteSize ls_top_offset = JavaThread::lock_stack_top_offset(); 6419 6420 BLOCK_COMMENT("compiler_fast_lightweight_unlock {"); 6421 { // Lightweight Unlock 6422 NearLabel push_and_slow_path; 6423 6424 // Check if obj is top of lock-stack. 6425 z_lgf(top, Address(Z_thread, ls_top_offset)); 6426 6427 z_aghi(top, -oopSize); 6428 z_cg(obj, Address(Z_thread, top)); 6429 branch_optimized(bcondNotEqual, inflated_load_mark); 6430 6431 // Pop lock-stack. 6432 #ifdef ASSERT 6433 const Register temp_top = tmp1; // let's not kill top here, we can use for recursive check 6434 z_agrk(temp_top, top, Z_thread); 6435 z_xc(0, oopSize-1, temp_top, 0, temp_top); // wipe out lock-stack entry 6436 #endif 6437 z_alsi(in_bytes(ls_top_offset), Z_thread, -oopSize); // pop object 6438 6439 // The underflow check is elided. The recursive check will always fail 6440 // when the lock stack is empty because of the _bad_oop_sentinel field. 6441 6442 // Check if recursive. 6443 z_aghi(top, -oopSize); 6444 z_cg(obj, Address(Z_thread, top)); 6445 z_bre(unlocked); 6446 6447 // Not recursive 6448 6449 // Check for monitor (0b10). 6450 // Because we got here by popping (meaning we pushed in locked) 6451 // there will be no monitor in the box. So we need to push back the obj 6452 // so that the runtime can fix any potential anonymous owner. 6453 z_lg(mark, Address(obj, mark_offset)); 6454 z_tmll(mark, markWord::monitor_value); 6455 if (!UseObjectMonitorTable) { 6456 z_brnaz(inflated); 6457 } else { 6458 z_brnaz(push_and_slow_path); 6459 } 6460 6461 #ifdef ASSERT 6462 // Check header not unlocked (0b01). 6463 NearLabel not_unlocked; 6464 z_tmll(mark, markWord::unlocked_value); 6465 z_braz(not_unlocked); 6466 stop("lightweight_unlock already unlocked"); 6467 bind(not_unlocked); 6468 #endif // ASSERT 6469 6470 { // Try to unlock. Transition lock bits 0b00 => 0b01 6471 Register unlocked_obj = top; 6472 z_lgr(unlocked_obj, mark); 6473 z_oill(unlocked_obj, markWord::unlocked_value); 6474 z_csg(mark, unlocked_obj, mark_offset, obj); 6475 branch_optimized(Assembler::bcondEqual, unlocked); 6476 } 6477 6478 bind(push_and_slow_path); 6479 // Restore lock-stack and handle the unlock in runtime. 6480 z_lgf(top, Address(Z_thread, ls_top_offset)); 6481 DEBUG_ONLY(z_stg(obj, Address(Z_thread, top));) 6482 z_alsi(in_bytes(ls_top_offset), Z_thread, oopSize); 6483 // set CC to NE 6484 z_ltgr(obj, obj); // object is not null here 6485 z_bru(slow_path); 6486 } 6487 BLOCK_COMMENT("} compiler_fast_lightweight_unlock"); 6488 6489 { // Handle inflated monitor. 6490 6491 bind(inflated_load_mark); 6492 6493 z_lg(mark, Address(obj, mark_offset)); 6494 6495 #ifdef ASSERT 6496 z_tmll(mark, markWord::monitor_value); 6497 z_brnaz(inflated); 6498 stop("Fast Unlock not monitor"); 6499 #endif // ASSERT 6500 6501 bind(inflated); 6502 6503 #ifdef ASSERT 6504 NearLabel check_done, loop; 6505 z_lgf(top, Address(Z_thread, ls_top_offset)); 6506 bind(loop); 6507 z_aghi(top, -oopSize); 6508 compareU32_and_branch(top, in_bytes(JavaThread::lock_stack_base_offset()), 6509 bcondLow, check_done); 6510 z_cg(obj, Address(Z_thread, top)); 6511 z_brne(loop); 6512 stop("Fast Unlock lock on stack"); 6513 bind(check_done); 6514 #endif // ASSERT 6515 6516 const Register tmp1_monitor = tmp1; 6517 6518 if (!UseObjectMonitorTable) { 6519 assert(tmp1_monitor == mark, "should be the same here"); 6520 } else { 6521 // Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack. 6522 z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); 6523 // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*) 6524 z_cghi(tmp1_monitor, alignof(ObjectMonitor*)); 6525 6526 z_brl(slow_path); 6527 } 6528 6529 // mark contains the tagged ObjectMonitor*. 6530 const Register monitor = mark; 6531 6532 const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value)); 6533 const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; 6534 const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; 6535 const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; 6536 const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; 6537 const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; 6538 6539 NearLabel not_recursive; 6540 const Register recursions = tmp2; 6541 6542 // Check if recursive. 6543 load_and_test_long(recursions, recursions_address); 6544 z_bre(not_recursive); // if 0 then jump, it's not recursive locking 6545 6546 // Recursive unlock 6547 z_agsi(recursions_address, -1ll); 6548 z_cgr(monitor, monitor); // set the CC to EQUAL 6549 z_bru(unlocked); 6550 6551 bind(not_recursive); 6552 6553 NearLabel check_succ, set_eq_unlocked; 6554 6555 // Set owner to null. 6556 // Release to satisfy the JMM 6557 z_release(); 6558 z_lghi(tmp2, 0); 6559 z_stg(tmp2 /*=0*/, owner_address); 6560 // We need a full fence after clearing owner to avoid stranding. 6561 z_fence(); 6562 6563 // Check if the entry lists are empty (EntryList first - by convention). 6564 load_and_test_long(tmp2, EntryList_address); 6565 z_brne(check_succ); 6566 load_and_test_long(tmp2, cxq_address); 6567 z_bre(unlocked); // If so we are done. 6568 6569 bind(check_succ); 6570 6571 // Check if there is a successor. 6572 load_and_test_long(tmp2, succ_address); 6573 z_brne(set_eq_unlocked); // If so we are done. 6574 6575 // Save the monitor pointer in the current thread, so we can try to 6576 // reacquire the lock in SharedRuntime::monitor_exit_helper(). 6577 if (!UseObjectMonitorTable) { 6578 z_xilf(monitor, markWord::monitor_value); 6579 } 6580 z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); 6581 6582 z_ltgr(obj, obj); // Set flag = NE 6583 z_bru(slow_path); 6584 6585 bind(set_eq_unlocked); 6586 z_cr(tmp2, tmp2); // Set flag = EQ 6587 } 6588 6589 bind(unlocked); 6590 6591 #ifdef ASSERT 6592 // Check that unlocked label is reached with flag == EQ. 6593 NearLabel flag_correct; 6594 z_bre(flag_correct); 6595 stop("CC is not set to EQ, it should be - unlock"); 6596 #endif // ASSERT 6597 6598 bind(slow_path); 6599 6600 #ifdef ASSERT 6601 // Check that slow_path label is reached with flag == NE. 6602 z_brne(flag_correct); 6603 stop("CC is not set to NE, it should be - unlock"); 6604 bind(flag_correct); 6605 #endif // ASSERT 6606 6607 // C2 uses the value of flag (NE vs EQ) to determine the continuation. 6608 } 6609 6610 void MacroAssembler::pop_count_int(Register r_dst, Register r_src, Register r_tmp) { 6611 BLOCK_COMMENT("pop_count_int {"); 6612 6613 assert(r_tmp != noreg, "temp register required for pop_count_int, as code may run on machine older than z15"); 6614 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 6615 6616 if (VM_Version::has_MiscInstrExt3()) { 6617 pop_count_int_with_ext3(r_dst, r_src); 6618 } else { 6619 pop_count_int_without_ext3(r_dst, r_src, r_tmp); 6620 } 6621 6622 BLOCK_COMMENT("} pop_count_int"); 6623 } 6624 6625 void MacroAssembler::pop_count_long(Register r_dst, Register r_src, Register r_tmp) { 6626 BLOCK_COMMENT("pop_count_long {"); 6627 6628 assert(r_tmp != noreg, "temp register required for pop_count_long, as code may run on machine older than z15"); 6629 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 6630 6631 if (VM_Version::has_MiscInstrExt3()) { 6632 pop_count_long_with_ext3(r_dst, r_src); 6633 } else { 6634 pop_count_long_without_ext3(r_dst, r_src, r_tmp); 6635 } 6636 6637 BLOCK_COMMENT("} pop_count_long"); 6638 } 6639 6640 void MacroAssembler::pop_count_int_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 6641 BLOCK_COMMENT("pop_count_int_without_ext3 {"); 6642 6643 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 6644 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 6645 6646 z_popcnt(r_dst, r_src, 0); 6647 z_srlg(r_tmp, r_dst, 16); 6648 z_alr(r_dst, r_tmp); 6649 z_srlg(r_tmp, r_dst, 8); 6650 z_alr(r_dst, r_tmp); 6651 z_llgcr(r_dst, r_dst); 6652 6653 BLOCK_COMMENT("} pop_count_int_without_ext3"); 6654 } 6655 6656 void MacroAssembler::pop_count_long_without_ext3(Register r_dst, Register r_src, Register r_tmp) { 6657 BLOCK_COMMENT("pop_count_long_without_ext3 {"); 6658 6659 assert(r_tmp != noreg, "temp register required for popcnt, for machines < z15"); 6660 assert_different_registers(r_dst, r_tmp); // if r_src is same as r_tmp, it should be fine 6661 6662 z_popcnt(r_dst, r_src, 0); 6663 z_ahhlr(r_dst, r_dst, r_dst); 6664 z_sllg(r_tmp, r_dst, 16); 6665 z_algr(r_dst, r_tmp); 6666 z_sllg(r_tmp, r_dst, 8); 6667 z_algr(r_dst, r_tmp); 6668 z_srlg(r_dst, r_dst, 56); 6669 6670 BLOCK_COMMENT("} pop_count_long_without_ext3"); 6671 } 6672 6673 void MacroAssembler::pop_count_long_with_ext3(Register r_dst, Register r_src) { 6674 BLOCK_COMMENT("pop_count_long_with_ext3 {"); 6675 6676 guarantee(VM_Version::has_MiscInstrExt3(), 6677 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 6678 z_popcnt(r_dst, r_src, 8); 6679 6680 BLOCK_COMMENT("} pop_count_long_with_ext3"); 6681 } 6682 6683 void MacroAssembler::pop_count_int_with_ext3(Register r_dst, Register r_src) { 6684 BLOCK_COMMENT("pop_count_int_with_ext3 {"); 6685 6686 guarantee(VM_Version::has_MiscInstrExt3(), 6687 "this hardware doesn't support miscellaneous-instruction-extensions facility 3, still pop_count_long_with_ext3 is used"); 6688 z_llgfr(r_dst, r_src); 6689 z_popcnt(r_dst, r_dst, 8); 6690 6691 BLOCK_COMMENT("} pop_count_int_with_ext3"); 6692 }