1 /* 2 * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2023 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/codeBuffer.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "compiler/disassembler.hpp" 30 #include "gc/shared/barrierSet.hpp" 31 #include "gc/shared/barrierSetAssembler.hpp" 32 #include "gc/shared/collectedHeap.inline.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "memory/universe.hpp" 37 #include "oops/accessDecorators.hpp" 38 #include "oops/compressedOops.inline.hpp" 39 #include "oops/klass.inline.hpp" 40 #include "prims/methodHandles.hpp" 41 #include "registerSaver_s390.hpp" 42 #include "runtime/icache.hpp" 43 #include "runtime/interfaceSupport.inline.hpp" 44 #include "runtime/objectMonitor.hpp" 45 #include "runtime/os.hpp" 46 #include "runtime/safepoint.hpp" 47 #include "runtime/safepointMechanism.hpp" 48 #include "runtime/sharedRuntime.hpp" 49 #include "runtime/stubRoutines.hpp" 50 #include "utilities/events.hpp" 51 #include "utilities/macros.hpp" 52 #include "utilities/powerOfTwo.hpp" 53 54 #include <ucontext.h> 55 56 #define BLOCK_COMMENT(str) block_comment(str) 57 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 58 59 // Move 32-bit register if destination and source are different. 60 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 61 if (rs != rd) { z_lr(rd, rs); } 62 } 63 64 // Move register if destination and source are different. 65 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 66 if (rs != rd) { z_lgr(rd, rs); } 67 } 68 69 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 70 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 71 if (rs != rd) { z_llgfr(rd, rs); } 72 } 73 74 // Move float register if destination and source are different. 75 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 76 if (rs != rd) { z_ldr(rd, rs); } 77 } 78 79 // Move integer register if destination and source are different. 80 // It is assumed that shorter-than-int types are already 81 // appropriately sign-extended. 82 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 83 BasicType src_type) { 84 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 85 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 86 87 if (dst_type == src_type) { 88 lgr_if_needed(dst, src); // Just move all 64 bits. 89 return; 90 } 91 92 switch (dst_type) { 93 // Do not support these types for now. 94 // case T_BOOLEAN: 95 case T_BYTE: // signed byte 96 switch (src_type) { 97 case T_INT: 98 z_lgbr(dst, src); 99 break; 100 default: 101 ShouldNotReachHere(); 102 } 103 return; 104 105 case T_CHAR: 106 case T_SHORT: 107 switch (src_type) { 108 case T_INT: 109 if (dst_type == T_CHAR) { 110 z_llghr(dst, src); 111 } else { 112 z_lghr(dst, src); 113 } 114 break; 115 default: 116 ShouldNotReachHere(); 117 } 118 return; 119 120 case T_INT: 121 switch (src_type) { 122 case T_BOOLEAN: 123 case T_BYTE: 124 case T_CHAR: 125 case T_SHORT: 126 case T_INT: 127 case T_LONG: 128 case T_OBJECT: 129 case T_ARRAY: 130 case T_VOID: 131 case T_ADDRESS: 132 lr_if_needed(dst, src); 133 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 134 return; 135 136 default: 137 assert(false, "non-integer src type"); 138 return; 139 } 140 case T_LONG: 141 switch (src_type) { 142 case T_BOOLEAN: 143 case T_BYTE: 144 case T_CHAR: 145 case T_SHORT: 146 case T_INT: 147 z_lgfr(dst, src); // sign extension 148 return; 149 150 case T_LONG: 151 case T_OBJECT: 152 case T_ARRAY: 153 case T_VOID: 154 case T_ADDRESS: 155 lgr_if_needed(dst, src); 156 return; 157 158 default: 159 assert(false, "non-integer src type"); 160 return; 161 } 162 return; 163 case T_OBJECT: 164 case T_ARRAY: 165 case T_VOID: 166 case T_ADDRESS: 167 switch (src_type) { 168 // These types don't make sense to be converted to pointers: 169 // case T_BOOLEAN: 170 // case T_BYTE: 171 // case T_CHAR: 172 // case T_SHORT: 173 174 case T_INT: 175 z_llgfr(dst, src); // zero extension 176 return; 177 178 case T_LONG: 179 case T_OBJECT: 180 case T_ARRAY: 181 case T_VOID: 182 case T_ADDRESS: 183 lgr_if_needed(dst, src); 184 return; 185 186 default: 187 assert(false, "non-integer src type"); 188 return; 189 } 190 return; 191 default: 192 assert(false, "non-integer dst type"); 193 return; 194 } 195 } 196 197 // Move float register if destination and source are different. 198 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 199 FloatRegister src, BasicType src_type) { 200 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 201 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 202 if (dst_type == src_type) { 203 ldr_if_needed(dst, src); // Just move all 64 bits. 204 } else { 205 switch (dst_type) { 206 case T_FLOAT: 207 assert(src_type == T_DOUBLE, "invalid float type combination"); 208 z_ledbr(dst, src); 209 return; 210 case T_DOUBLE: 211 assert(src_type == T_FLOAT, "invalid float type combination"); 212 z_ldebr(dst, src); 213 return; 214 default: 215 assert(false, "non-float dst type"); 216 return; 217 } 218 } 219 } 220 221 // Optimized emitter for reg to mem operations. 222 // Uses modern instructions if running on modern hardware, classic instructions 223 // otherwise. Prefers (usually shorter) classic instructions if applicable. 224 // Data register (reg) cannot be used as work register. 225 // 226 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 227 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 228 void MacroAssembler::freg2mem_opt(FloatRegister reg, 229 int64_t disp, 230 Register index, 231 Register base, 232 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 233 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 234 Register scratch) { 235 index = (index == noreg) ? Z_R0 : index; 236 if (Displacement::is_shortDisp(disp)) { 237 (this->*classic)(reg, disp, index, base); 238 } else { 239 if (Displacement::is_validDisp(disp)) { 240 (this->*modern)(reg, disp, index, base); 241 } else { 242 if (scratch != Z_R0 && scratch != Z_R1) { 243 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 244 } else { 245 if (scratch != Z_R0) { // scratch == Z_R1 246 if ((scratch == index) || (index == base)) { 247 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 248 } else { 249 add2reg(scratch, disp, base); 250 (this->*classic)(reg, 0, index, scratch); 251 if (base == scratch) { 252 add2reg(base, -disp); // Restore base. 253 } 254 } 255 } else { // scratch == Z_R0 256 z_lgr(scratch, base); 257 add2reg(base, disp); 258 (this->*classic)(reg, 0, index, base); 259 z_lgr(base, scratch); // Restore base. 260 } 261 } 262 } 263 } 264 } 265 266 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 267 if (is_double) { 268 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 269 } else { 270 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 271 } 272 } 273 274 // Optimized emitter for mem to reg operations. 275 // Uses modern instructions if running on modern hardware, classic instructions 276 // otherwise. Prefers (usually shorter) classic instructions if applicable. 277 // data register (reg) cannot be used as work register. 278 // 279 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 280 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 281 void MacroAssembler::mem2freg_opt(FloatRegister reg, 282 int64_t disp, 283 Register index, 284 Register base, 285 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 286 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 287 Register scratch) { 288 index = (index == noreg) ? Z_R0 : index; 289 if (Displacement::is_shortDisp(disp)) { 290 (this->*classic)(reg, disp, index, base); 291 } else { 292 if (Displacement::is_validDisp(disp)) { 293 (this->*modern)(reg, disp, index, base); 294 } else { 295 if (scratch != Z_R0 && scratch != Z_R1) { 296 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 297 } else { 298 if (scratch != Z_R0) { // scratch == Z_R1 299 if ((scratch == index) || (index == base)) { 300 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 301 } else { 302 add2reg(scratch, disp, base); 303 (this->*classic)(reg, 0, index, scratch); 304 if (base == scratch) { 305 add2reg(base, -disp); // Restore base. 306 } 307 } 308 } else { // scratch == Z_R0 309 z_lgr(scratch, base); 310 add2reg(base, disp); 311 (this->*classic)(reg, 0, index, base); 312 z_lgr(base, scratch); // Restore base. 313 } 314 } 315 } 316 } 317 } 318 319 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 320 if (is_double) { 321 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 322 } else { 323 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 324 } 325 } 326 327 // Optimized emitter for reg to mem operations. 328 // Uses modern instructions if running on modern hardware, classic instructions 329 // otherwise. Prefers (usually shorter) classic instructions if applicable. 330 // Data register (reg) cannot be used as work register. 331 // 332 // Don't rely on register locking, instead pass a scratch register 333 // (Z_R0 by default) 334 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 335 void MacroAssembler::reg2mem_opt(Register reg, 336 int64_t disp, 337 Register index, 338 Register base, 339 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 340 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 341 Register scratch) { 342 index = (index == noreg) ? Z_R0 : index; 343 if (Displacement::is_shortDisp(disp)) { 344 (this->*classic)(reg, disp, index, base); 345 } else { 346 if (Displacement::is_validDisp(disp)) { 347 (this->*modern)(reg, disp, index, base); 348 } else { 349 if (scratch != Z_R0 && scratch != Z_R1) { 350 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 351 } else { 352 if (scratch != Z_R0) { // scratch == Z_R1 353 if ((scratch == index) || (index == base)) { 354 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 355 } else { 356 add2reg(scratch, disp, base); 357 (this->*classic)(reg, 0, index, scratch); 358 if (base == scratch) { 359 add2reg(base, -disp); // Restore base. 360 } 361 } 362 } else { // scratch == Z_R0 363 if ((scratch == reg) || (scratch == base) || (reg == base)) { 364 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 365 } else { 366 z_lgr(scratch, base); 367 add2reg(base, disp); 368 (this->*classic)(reg, 0, index, base); 369 z_lgr(base, scratch); // Restore base. 370 } 371 } 372 } 373 } 374 } 375 } 376 377 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 378 int store_offset = offset(); 379 if (is_double) { 380 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 381 } else { 382 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 383 } 384 return store_offset; 385 } 386 387 // Optimized emitter for mem to reg operations. 388 // Uses modern instructions if running on modern hardware, classic instructions 389 // otherwise. Prefers (usually shorter) classic instructions if applicable. 390 // Data register (reg) will be used as work register where possible. 391 void MacroAssembler::mem2reg_opt(Register reg, 392 int64_t disp, 393 Register index, 394 Register base, 395 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 396 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 397 index = (index == noreg) ? Z_R0 : index; 398 if (Displacement::is_shortDisp(disp)) { 399 (this->*classic)(reg, disp, index, base); 400 } else { 401 if (Displacement::is_validDisp(disp)) { 402 (this->*modern)(reg, disp, index, base); 403 } else { 404 if ((reg == index) && (reg == base)) { 405 z_sllg(reg, reg, 1); 406 add2reg(reg, disp); 407 (this->*classic)(reg, 0, noreg, reg); 408 } else if ((reg == index) && (reg != Z_R0)) { 409 add2reg(reg, disp); 410 (this->*classic)(reg, 0, reg, base); 411 } else if (reg == base) { 412 add2reg(reg, disp); 413 (this->*classic)(reg, 0, index, reg); 414 } else if (reg != Z_R0) { 415 add2reg(reg, disp, base); 416 (this->*classic)(reg, 0, index, reg); 417 } else { // reg == Z_R0 && reg != base here 418 add2reg(base, disp); 419 (this->*classic)(reg, 0, index, base); 420 add2reg(base, -disp); 421 } 422 } 423 } 424 } 425 426 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 427 if (is_double) { 428 z_lg(reg, a); 429 } else { 430 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 431 } 432 } 433 434 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 435 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 436 } 437 438 void MacroAssembler::and_imm(Register r, long mask, 439 Register tmp /* = Z_R0 */, 440 bool wide /* = false */) { 441 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 442 443 if (!wide) { 444 z_nilf(r, mask); 445 return; 446 } 447 448 assert(r != tmp, " need a different temporary register !"); 449 load_const_optimized(tmp, mask); 450 z_ngr(r, tmp); 451 } 452 453 // Calculate the 1's complement. 454 // Note: The condition code is neither preserved nor correctly set by this code!!! 455 // Note: (wide == false) does not protect the high order half of the target register 456 // from alteration. It only serves as optimization hint for 32-bit results. 457 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 458 459 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 460 z_xilf(r1, -1); 461 if (wide) { 462 z_xihf(r1, -1); 463 } 464 } else { // Distinct src and dst registers. 465 load_const_optimized(r1, -1); 466 z_xgr(r1, r2); 467 } 468 } 469 470 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 471 assert(lBitPos >= 0, "zero is leftmost bit position"); 472 assert(rBitPos <= 63, "63 is rightmost bit position"); 473 assert(lBitPos <= rBitPos, "inverted selection interval"); 474 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 475 } 476 477 // Helper function for the "Rotate_then_<logicalOP>" emitters. 478 // Rotate src, then mask register contents such that only bits in range survive. 479 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 480 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 481 // The caller must ensure that the selected range only contains bits with defined value. 482 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 483 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 484 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 485 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 486 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 487 // Pre-determine which parts of dst will be zero after shift/rotate. 488 bool llZero = sll4rll && (nRotate >= 16); 489 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 490 bool lfZero = llZero && lhZero; 491 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 492 bool hhZero = (srl4rll && (nRotate <= -16)); 493 bool hfZero = hlZero && hhZero; 494 495 // rotate then mask src operand. 496 // if oneBits == true, all bits outside selected range are 1s. 497 // if oneBits == false, all bits outside selected range are 0s. 498 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 499 if (dst32bit) { 500 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 501 } else { 502 if (sll4rll) { z_sllg(dst, src, nRotate); } 503 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 504 else { z_rllg(dst, src, nRotate); } 505 } 506 } else { 507 if (sll4rll) { z_sllg(dst, src, nRotate); } 508 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 509 else { z_rllg(dst, src, nRotate); } 510 } 511 512 unsigned long range_mask = create_mask(lBitPos, rBitPos); 513 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 514 unsigned int range_mask_l = (unsigned int)range_mask; 515 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 516 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 517 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 518 unsigned short range_mask_ll = (unsigned short)range_mask; 519 // Works for z9 and newer H/W. 520 if (oneBits) { 521 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 522 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 523 } else { 524 // All bits outside range become 0s 525 if (((~range_mask_l) != 0) && !lfZero) { 526 z_nilf(dst, range_mask_l); 527 } 528 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 529 z_nihf(dst, range_mask_h); 530 } 531 } 532 } 533 534 // Rotate src, then insert selected range from rotated src into dst. 535 // Clear dst before, if requested. 536 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 537 int nRotate, bool clear_dst) { 538 // This version does not depend on src being zero-extended int2long. 539 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 540 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 541 } 542 543 // Rotate src, then and selected range from rotated src into dst. 544 // Set condition code only if so requested. Otherwise it is unpredictable. 545 // See performance note in macroAssembler_s390.hpp for important information. 546 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 547 int nRotate, bool test_only) { 548 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 549 // This version does not depend on src being zero-extended int2long. 550 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 551 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 552 } 553 554 // Rotate src, then or selected range from rotated src into dst. 555 // Set condition code only if so requested. Otherwise it is unpredictable. 556 // See performance note in macroAssembler_s390.hpp for important information. 557 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 558 int nRotate, bool test_only) { 559 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 560 // This version does not depend on src being zero-extended int2long. 561 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 562 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 563 } 564 565 // Rotate src, then xor selected range from rotated src into dst. 566 // Set condition code only if so requested. Otherwise it is unpredictable. 567 // See performance note in macroAssembler_s390.hpp for important information. 568 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 569 int nRotate, bool test_only) { 570 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 571 // This version does not depend on src being zero-extended int2long. 572 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 573 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 574 } 575 576 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 577 if (inc.is_register()) { 578 z_agr(r1, inc.as_register()); 579 } else { // constant 580 intptr_t imm = inc.as_constant(); 581 add2reg(r1, imm); 582 } 583 } 584 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 585 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 586 // calculation and is thus rather slow. 587 // 588 // There is no handling for special cases, e.g. cval==0 or cval==1. 589 // 590 // Returns len of generated code block. 591 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 592 int block_start = offset(); 593 594 bool sign_flip = cval < 0; 595 cval = sign_flip ? -cval : cval; 596 597 BLOCK_COMMENT("Reg64*Con16 {"); 598 599 int bit1 = cval & -cval; 600 if (bit1 == cval) { 601 z_sllg(rval, rval, exact_log2(bit1)); 602 if (sign_flip) { z_lcgr(rval, rval); } 603 } else { 604 int bit2 = (cval-bit1) & -(cval-bit1); 605 if ((bit1+bit2) == cval) { 606 z_sllg(work, rval, exact_log2(bit1)); 607 z_sllg(rval, rval, exact_log2(bit2)); 608 z_agr(rval, work); 609 if (sign_flip) { z_lcgr(rval, rval); } 610 } else { 611 if (sign_flip) { z_mghi(rval, -cval); } 612 else { z_mghi(rval, cval); } 613 } 614 } 615 BLOCK_COMMENT("} Reg64*Con16"); 616 617 int block_end = offset(); 618 return block_end - block_start; 619 } 620 621 // Generic operation r1 := r2 + imm. 622 // 623 // Should produce the best code for each supported CPU version. 624 // r2 == noreg yields r1 := r1 + imm 625 // imm == 0 emits either no instruction or r1 := r2 ! 626 // NOTES: 1) Don't use this function where fixed sized 627 // instruction sequences are required!!! 628 // 2) Don't use this function if condition code 629 // setting is required! 630 // 3) Despite being declared as int64_t, the parameter imm 631 // must be a simm_32 value (= signed 32-bit integer). 632 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 633 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 634 635 if (r2 == noreg) { r2 = r1; } 636 637 // Handle special case imm == 0. 638 if (imm == 0) { 639 lgr_if_needed(r1, r2); 640 // Nothing else to do. 641 return; 642 } 643 644 if (!PreferLAoverADD || (r2 == Z_R0)) { 645 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 646 647 // Can we encode imm in 16 bits signed? 648 if (Immediate::is_simm16(imm)) { 649 if (r1 == r2) { 650 z_aghi(r1, imm); 651 return; 652 } 653 if (distinctOpnds) { 654 z_aghik(r1, r2, imm); 655 return; 656 } 657 z_lgr(r1, r2); 658 z_aghi(r1, imm); 659 return; 660 } 661 } else { 662 // Can we encode imm in 12 bits unsigned? 663 if (Displacement::is_shortDisp(imm)) { 664 z_la(r1, imm, r2); 665 return; 666 } 667 // Can we encode imm in 20 bits signed? 668 if (Displacement::is_validDisp(imm)) { 669 // Always use LAY instruction, so we don't need the tmp register. 670 z_lay(r1, imm, r2); 671 return; 672 } 673 674 } 675 676 // Can handle it (all possible values) with long immediates. 677 lgr_if_needed(r1, r2); 678 z_agfi(r1, imm); 679 } 680 681 // Generic operation r := b + x + d 682 // 683 // Addition of several operands with address generation semantics - sort of: 684 // - no restriction on the registers. Any register will do for any operand. 685 // - x == noreg: operand will be disregarded. 686 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 687 // - x == Z_R0: just disregard 688 // - b == Z_R0: use as operand. This is not address generation semantics!!! 689 // 690 // The same restrictions as on add2reg() are valid!!! 691 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 692 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 693 694 if (x == noreg) { x = Z_R0; } 695 if (b == noreg) { b = r; } 696 697 // Handle special case x == R0. 698 if (x == Z_R0) { 699 // Can simply add the immediate value to the base register. 700 add2reg(r, d, b); 701 return; 702 } 703 704 if (!PreferLAoverADD || (b == Z_R0)) { 705 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 706 // Handle special case d == 0. 707 if (d == 0) { 708 if (b == x) { z_sllg(r, b, 1); return; } 709 if (r == x) { z_agr(r, b); return; } 710 if (r == b) { z_agr(r, x); return; } 711 if (distinctOpnds) { z_agrk(r, x, b); return; } 712 z_lgr(r, b); 713 z_agr(r, x); 714 } else { 715 if (x == b) { z_sllg(r, x, 1); } 716 else if (r == x) { z_agr(r, b); } 717 else if (r == b) { z_agr(r, x); } 718 else if (distinctOpnds) { z_agrk(r, x, b); } 719 else { 720 z_lgr(r, b); 721 z_agr(r, x); 722 } 723 add2reg(r, d); 724 } 725 } else { 726 // Can we encode imm in 12 bits unsigned? 727 if (Displacement::is_shortDisp(d)) { 728 z_la(r, d, x, b); 729 return; 730 } 731 // Can we encode imm in 20 bits signed? 732 if (Displacement::is_validDisp(d)) { 733 z_lay(r, d, x, b); 734 return; 735 } 736 z_la(r, 0, x, b); 737 add2reg(r, d); 738 } 739 } 740 741 // Generic emitter (32bit) for direct memory increment. 742 // For optimal code, do not specify Z_R0 as temp register. 743 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 744 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 745 z_asi(a, imm); 746 } else { 747 z_lgf(tmp, a); 748 add2reg(tmp, imm); 749 z_st(tmp, a); 750 } 751 } 752 753 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 754 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 755 z_agsi(a, imm); 756 } else { 757 z_lg(tmp, a); 758 add2reg(tmp, imm); 759 z_stg(tmp, a); 760 } 761 } 762 763 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 764 switch (size_in_bytes) { 765 case 8: z_lg(dst, src); break; 766 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 767 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 768 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 769 default: ShouldNotReachHere(); 770 } 771 } 772 773 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 774 switch (size_in_bytes) { 775 case 8: z_stg(src, dst); break; 776 case 4: z_st(src, dst); break; 777 case 2: z_sth(src, dst); break; 778 case 1: z_stc(src, dst); break; 779 default: ShouldNotReachHere(); 780 } 781 } 782 783 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 784 // a high-order summand in register tmp. 785 // 786 // return value: < 0: No split required, si20 actually has property uimm12. 787 // >= 0: Split performed. Use return value as uimm12 displacement and 788 // tmp as index register. 789 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 790 assert(Immediate::is_simm20(si20_offset), "sanity"); 791 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 792 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 793 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 794 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 795 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 796 797 Register work = accumulate? Z_R0 : tmp; 798 799 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 800 z_lghi(work, ll_off>>12); // Implicit sign extension. 801 z_slag(work, work, 12); 802 } else { // Len of code = 0..10. 803 if (ll_off == 0) { return -1; } 804 // ll_off has 8 significant bits (at most) plus sign. 805 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 806 z_llilh(work, ll_off >> 16); 807 if (ll_off < 0) { // Sign-extension required. 808 z_lgfr(work, work); 809 } 810 } else { 811 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 812 z_llill(work, ll_off); 813 } else { // Non-zero bits in both halfbytes. 814 z_lghi(work, ll_off>>12); // Implicit sign extension. 815 z_slag(work, work, 12); 816 } 817 } 818 } 819 if (accumulate) { z_algr(tmp, work); } // len of code += 4 820 return lg_off; 821 } 822 823 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 824 if (Displacement::is_validDisp(si20)) { 825 z_ley(t, si20, a); 826 } else { 827 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 828 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 829 // pool loads). 830 bool accumulate = true; 831 bool fixed_codelen = true; 832 Register work; 833 834 if (fixed_codelen) { 835 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 836 } else { 837 accumulate = (a == tmp); 838 } 839 work = tmp; 840 841 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 842 if (disp12 < 0) { 843 z_le(t, si20, work); 844 } else { 845 if (accumulate) { 846 z_le(t, disp12, work); 847 } else { 848 z_le(t, disp12, work, a); 849 } 850 } 851 } 852 } 853 854 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 855 if (Displacement::is_validDisp(si20)) { 856 z_ldy(t, si20, a); 857 } else { 858 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 859 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 860 // pool loads). 861 bool accumulate = true; 862 bool fixed_codelen = true; 863 Register work; 864 865 if (fixed_codelen) { 866 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 867 } else { 868 accumulate = (a == tmp); 869 } 870 work = tmp; 871 872 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 873 if (disp12 < 0) { 874 z_ld(t, si20, work); 875 } else { 876 if (accumulate) { 877 z_ld(t, disp12, work); 878 } else { 879 z_ld(t, disp12, work, a); 880 } 881 } 882 } 883 } 884 885 // PCrelative TOC access. 886 // Returns distance (in bytes) from current position to start of consts section. 887 // Returns 0 (zero) if no consts section exists or if it has size zero. 888 long MacroAssembler::toc_distance() { 889 CodeSection* cs = code()->consts(); 890 return (long)((cs != NULL) ? cs->start()-pc() : 0); 891 } 892 893 // Implementation on x86/sparc assumes that constant and instruction section are 894 // adjacent, but this doesn't hold. Two special situations may occur, that we must 895 // be able to handle: 896 // 1. const section may be located apart from the inst section. 897 // 2. const section may be empty 898 // In both cases, we use the const section's start address to compute the "TOC", 899 // this seems to occur only temporarily; in the final step we always seem to end up 900 // with the pc-relatice variant. 901 // 902 // PC-relative offset could be +/-2**32 -> use long for disp 903 // Furthermore: makes no sense to have special code for 904 // adjacent const and inst sections. 905 void MacroAssembler::load_toc(Register Rtoc) { 906 // Simply use distance from start of const section (should be patched in the end). 907 long disp = toc_distance(); 908 909 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 910 relocate(rspec); 911 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 912 } 913 914 // PCrelative TOC access. 915 // Load from anywhere pcrelative (with relocation of load instr) 916 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 917 address pc = this->pc(); 918 ptrdiff_t total_distance = dataLocation - pc; 919 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 920 921 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 922 assert(total_distance != 0, "sanity"); 923 924 // Some extra safety net. 925 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 926 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 927 } 928 929 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 930 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 931 } 932 933 934 // PCrelative TOC access. 935 // Load from anywhere pcrelative (with relocation of load instr) 936 // loaded addr has to be relocated when added to constant pool. 937 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 938 address pc = this->pc(); 939 ptrdiff_t total_distance = addrLocation - pc; 940 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 941 942 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 943 944 // Some extra safety net. 945 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 946 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 947 } 948 949 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 950 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 951 } 952 953 // Generic operation: load a value from memory and test. 954 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 955 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 956 z_lb(dst, a); 957 z_ltr(dst, dst); 958 } 959 960 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 961 int64_t disp = a.disp20(); 962 if (Displacement::is_shortDisp(disp)) { 963 z_lh(dst, a); 964 } else if (Displacement::is_longDisp(disp)) { 965 z_lhy(dst, a); 966 } else { 967 guarantee(false, "displacement out of range"); 968 } 969 z_ltr(dst, dst); 970 } 971 972 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 973 z_lt(dst, a); 974 } 975 976 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 977 z_ltgf(dst, a); 978 } 979 980 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 981 z_ltg(dst, a); 982 } 983 984 // Test a bit in memory. 985 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 986 assert(a.index() == noreg, "no index reg allowed in testbit"); 987 if (bit <= 7) { 988 z_tm(a.disp() + 3, a.base(), 1 << bit); 989 } else if (bit <= 15) { 990 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 991 } else if (bit <= 23) { 992 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 993 } else if (bit <= 31) { 994 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 995 } else { 996 ShouldNotReachHere(); 997 } 998 } 999 1000 // Test a bit in a register. Result is reflected in CC. 1001 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1002 if (bitPos < 16) { 1003 z_tmll(r, 1U<<bitPos); 1004 } else if (bitPos < 32) { 1005 z_tmlh(r, 1U<<(bitPos-16)); 1006 } else if (bitPos < 48) { 1007 z_tmhl(r, 1U<<(bitPos-32)); 1008 } else if (bitPos < 64) { 1009 z_tmhh(r, 1U<<(bitPos-48)); 1010 } else { 1011 ShouldNotReachHere(); 1012 } 1013 } 1014 1015 void MacroAssembler::prefetch_read(Address a) { 1016 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1017 } 1018 void MacroAssembler::prefetch_update(Address a) { 1019 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1020 } 1021 1022 // Clear a register, i.e. load const zero into reg. 1023 // Return len (in bytes) of generated instruction(s). 1024 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1025 // set_cc: Use instruction that sets the condition code, if true. 1026 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1027 unsigned int start_off = offset(); 1028 if (whole_reg) { 1029 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1030 } else { // Only 32bit register. 1031 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1032 } 1033 return offset() - start_off; 1034 } 1035 1036 #ifdef ASSERT 1037 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1038 switch (pattern_len) { 1039 case 1: 1040 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1041 case 2: 1042 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1043 case 4: 1044 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1045 case 8: 1046 return load_const_optimized_rtn_len(r, pattern, true); 1047 break; 1048 default: 1049 guarantee(false, "preset_reg: bad len"); 1050 } 1051 return 0; 1052 } 1053 #endif 1054 1055 // addr: Address descriptor of memory to clear. Index register will not be used! 1056 // size: Number of bytes to clear. 1057 // condition code will not be preserved. 1058 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1059 // !!! Use store_const() instead !!! 1060 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) { 1061 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large"); 1062 1063 switch (size) { 1064 case 0: 1065 return; 1066 case 1: 1067 z_mvi(addr, 0); 1068 return; 1069 case 2: 1070 z_mvhhi(addr, 0); 1071 return; 1072 case 4: 1073 z_mvhi(addr, 0); 1074 return; 1075 case 8: 1076 z_mvghi(addr, 0); 1077 return; 1078 default: ; // Fallthru to xc. 1079 } 1080 1081 // Caution: the emitter with Address operands does implicitly decrement the length 1082 if (size <= 256) { 1083 z_xc(addr, size, addr); 1084 } else { 1085 unsigned int offset = addr.disp(); 1086 unsigned int incr = 256; 1087 for (unsigned int i = 0; i <= size-incr; i += incr) { 1088 z_xc(offset, incr - 1, addr.base(), offset, addr.base()); 1089 offset += incr; 1090 } 1091 unsigned int rest = size - (offset - addr.disp()); 1092 if (size > 0) { 1093 z_xc(offset, rest-1, addr.base(), offset, addr.base()); 1094 } 1095 } 1096 } 1097 1098 void MacroAssembler::align(int modulus) { 1099 while (offset() % modulus != 0) z_nop(); 1100 } 1101 1102 // Special version for non-relocateable code if required alignment 1103 // is larger than CodeEntryAlignment. 1104 void MacroAssembler::align_address(int modulus) { 1105 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1106 } 1107 1108 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1109 Register temp_reg, 1110 int64_t extra_slot_offset) { 1111 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1112 // which issues an unnecessary add instruction. 1113 int stackElementSize = Interpreter::stackElementSize; 1114 int64_t offset = extra_slot_offset * stackElementSize; 1115 const Register argbase = Z_esp; 1116 if (arg_slot.is_constant()) { 1117 offset += arg_slot.as_constant() * stackElementSize; 1118 return Address(argbase, offset); 1119 } 1120 // else 1121 assert(temp_reg != noreg, "must specify"); 1122 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1123 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1124 return Address(argbase, temp_reg, offset); 1125 } 1126 1127 1128 //=================================================================== 1129 //=== START C O N S T A N T S I N C O D E S T R E A M === 1130 //=================================================================== 1131 //=== P A T CH A B L E C O N S T A N T S === 1132 //=================================================================== 1133 1134 1135 //--------------------------------------------------- 1136 // Load (patchable) constant into register 1137 //--------------------------------------------------- 1138 1139 1140 // Load absolute address (and try to optimize). 1141 // Note: This method is usable only for position-fixed code, 1142 // referring to a position-fixed target location. 1143 // If not so, relocations and patching must be used. 1144 void MacroAssembler::load_absolute_address(Register d, address addr) { 1145 assert(addr != NULL, "should not happen"); 1146 BLOCK_COMMENT("load_absolute_address:"); 1147 if (addr == NULL) { 1148 z_larl(d, pc()); // Dummy emit for size calc. 1149 return; 1150 } 1151 1152 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1153 z_larl(d, addr); 1154 return; 1155 } 1156 1157 load_const_optimized(d, (long)addr); 1158 } 1159 1160 // Load a 64bit constant. 1161 // Patchable code sequence, but not atomically patchable. 1162 // Make sure to keep code size constant -> no value-dependent optimizations. 1163 // Do not kill condition code. 1164 void MacroAssembler::load_const(Register t, long x) { 1165 // Note: Right shift is only cleanly defined for unsigned types 1166 // or for signed types with nonnegative values. 1167 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1168 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1169 } 1170 1171 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1172 // Patchable code sequence, but not atomically patchable. 1173 // Make sure to keep code size constant -> no value-dependent optimizations. 1174 // Do not kill condition code. 1175 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1176 if (sign_extend) { Assembler::z_lgfi(t, x); } 1177 else { Assembler::z_llilf(t, x); } 1178 } 1179 1180 // Load narrow oop constant, no decompression. 1181 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1182 assert(UseCompressedOops, "must be on to call this method"); 1183 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1184 } 1185 1186 // Load narrow klass constant, compression required. 1187 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1188 assert(UseCompressedClassPointers, "must be on to call this method"); 1189 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1190 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1191 } 1192 1193 //------------------------------------------------------ 1194 // Compare (patchable) constant with register. 1195 //------------------------------------------------------ 1196 1197 // Compare narrow oop in reg with narrow oop constant, no decompression. 1198 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1199 assert(UseCompressedOops, "must be on to call this method"); 1200 1201 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1202 } 1203 1204 // Compare narrow oop in reg with narrow oop constant, no decompression. 1205 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1206 assert(UseCompressedClassPointers, "must be on to call this method"); 1207 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1208 1209 Assembler::z_clfi(klass1, encoded_k); 1210 } 1211 1212 //---------------------------------------------------------- 1213 // Check which kind of load_constant we have here. 1214 //---------------------------------------------------------- 1215 1216 // Detection of CPU version dependent load_const sequence. 1217 // The detection is valid only for code sequences generated by load_const, 1218 // not load_const_optimized. 1219 bool MacroAssembler::is_load_const(address a) { 1220 unsigned long inst1, inst2; 1221 unsigned int len1, len2; 1222 1223 len1 = get_instruction(a, &inst1); 1224 len2 = get_instruction(a + len1, &inst2); 1225 1226 return is_z_iihf(inst1) && is_z_iilf(inst2); 1227 } 1228 1229 // Detection of CPU version dependent load_const_32to64 sequence. 1230 // Mostly used for narrow oops and narrow Klass pointers. 1231 // The detection is valid only for code sequences generated by load_const_32to64. 1232 bool MacroAssembler::is_load_const_32to64(address pos) { 1233 unsigned long inst1, inst2; 1234 unsigned int len1; 1235 1236 len1 = get_instruction(pos, &inst1); 1237 return is_z_llilf(inst1); 1238 } 1239 1240 // Detection of compare_immediate_narrow sequence. 1241 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1242 bool MacroAssembler::is_compare_immediate32(address pos) { 1243 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1244 } 1245 1246 // Detection of compare_immediate_narrow sequence. 1247 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1248 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1249 return is_compare_immediate32(pos); 1250 } 1251 1252 // Detection of compare_immediate_narrow sequence. 1253 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1254 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1255 return is_compare_immediate32(pos); 1256 } 1257 1258 //----------------------------------- 1259 // patch the load_constant 1260 //----------------------------------- 1261 1262 // CPU-version dependent patching of load_const. 1263 void MacroAssembler::patch_const(address a, long x) { 1264 assert(is_load_const(a), "not a load of a constant"); 1265 // Note: Right shift is only cleanly defined for unsigned types 1266 // or for signed types with nonnegative values. 1267 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1268 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1269 } 1270 1271 // Patching the value of CPU version dependent load_const_32to64 sequence. 1272 // The passed ptr MUST be in compressed format! 1273 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1274 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1275 1276 set_imm32(pos, np); 1277 return 6; 1278 } 1279 1280 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1281 // The passed ptr MUST be in compressed format! 1282 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1283 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1284 1285 set_imm32(pos, np); 1286 return 6; 1287 } 1288 1289 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1290 // The passed ptr must NOT be in compressed format! 1291 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1292 assert(UseCompressedOops, "Can only patch compressed oops"); 1293 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1294 } 1295 1296 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1297 // The passed ptr must NOT be in compressed format! 1298 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1299 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1300 1301 narrowKlass nk = CompressedKlassPointers::encode(k); 1302 return patch_load_const_32to64(pos, nk); 1303 } 1304 1305 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1306 // The passed ptr must NOT be in compressed format! 1307 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1308 assert(UseCompressedOops, "Can only patch compressed oops"); 1309 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1310 } 1311 1312 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1313 // The passed ptr must NOT be in compressed format! 1314 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1315 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1316 1317 narrowKlass nk = CompressedKlassPointers::encode(k); 1318 return patch_compare_immediate_32(pos, nk); 1319 } 1320 1321 //------------------------------------------------------------------------ 1322 // Extract the constant from a load_constant instruction stream. 1323 //------------------------------------------------------------------------ 1324 1325 // Get constant from a load_const sequence. 1326 long MacroAssembler::get_const(address a) { 1327 assert(is_load_const(a), "not a load of a constant"); 1328 unsigned long x; 1329 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1330 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1331 return (long) x; 1332 } 1333 1334 //-------------------------------------- 1335 // Store a constant in memory. 1336 //-------------------------------------- 1337 1338 // General emitter to move a constant to memory. 1339 // The store is atomic. 1340 // o Address must be given in RS format (no index register) 1341 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1342 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1343 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1344 // o Memory slot must be at least as wide as constant, will assert otherwise. 1345 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1346 int MacroAssembler::store_const(const Address &dest, long imm, 1347 unsigned int lm, unsigned int lc, 1348 Register scratch) { 1349 int64_t disp = dest.disp(); 1350 Register base = dest.base(); 1351 assert(!dest.has_index(), "not supported"); 1352 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1353 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1354 assert(lm>=lc, "memory slot too small"); 1355 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1356 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1357 1358 bool is_shortDisp = Displacement::is_shortDisp(disp); 1359 int store_offset = -1; 1360 1361 // For target len == 1 it's easy. 1362 if (lm == 1) { 1363 store_offset = offset(); 1364 if (is_shortDisp) { 1365 z_mvi(disp, base, imm); 1366 return store_offset; 1367 } else { 1368 z_mviy(disp, base, imm); 1369 return store_offset; 1370 } 1371 } 1372 1373 // All the "good stuff" takes an unsigned displacement. 1374 if (is_shortDisp) { 1375 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1376 1377 store_offset = offset(); 1378 switch (lm) { 1379 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1380 z_mvhhi(disp, base, imm); 1381 return store_offset; 1382 case 4: 1383 if (Immediate::is_simm16(imm)) { 1384 z_mvhi(disp, base, imm); 1385 return store_offset; 1386 } 1387 break; 1388 case 8: 1389 if (Immediate::is_simm16(imm)) { 1390 z_mvghi(disp, base, imm); 1391 return store_offset; 1392 } 1393 break; 1394 default: 1395 ShouldNotReachHere(); 1396 break; 1397 } 1398 } 1399 1400 // Can't optimize, so load value and store it. 1401 guarantee(scratch != noreg, " need a scratch register here !"); 1402 if (imm != 0) { 1403 load_const_optimized(scratch, imm); // Preserves CC anyway. 1404 } else { 1405 // Leave CC alone!! 1406 (void) clear_reg(scratch, true, false); // Indicate unused result. 1407 } 1408 1409 store_offset = offset(); 1410 if (is_shortDisp) { 1411 switch (lm) { 1412 case 2: 1413 z_sth(scratch, disp, Z_R0, base); 1414 return store_offset; 1415 case 4: 1416 z_st(scratch, disp, Z_R0, base); 1417 return store_offset; 1418 case 8: 1419 z_stg(scratch, disp, Z_R0, base); 1420 return store_offset; 1421 default: 1422 ShouldNotReachHere(); 1423 break; 1424 } 1425 } else { 1426 switch (lm) { 1427 case 2: 1428 z_sthy(scratch, disp, Z_R0, base); 1429 return store_offset; 1430 case 4: 1431 z_sty(scratch, disp, Z_R0, base); 1432 return store_offset; 1433 case 8: 1434 z_stg(scratch, disp, Z_R0, base); 1435 return store_offset; 1436 default: 1437 ShouldNotReachHere(); 1438 break; 1439 } 1440 } 1441 return -1; // should not reach here 1442 } 1443 1444 //=================================================================== 1445 //=== N O T P A T CH A B L E C O N S T A N T S === 1446 //=================================================================== 1447 1448 // Load constant x into register t with a fast instruction sequence 1449 // depending on the bits in x. Preserves CC under all circumstances. 1450 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1451 if (x == 0) { 1452 int len; 1453 if (emit) { 1454 len = clear_reg(t, true, false); 1455 } else { 1456 len = 4; 1457 } 1458 return len; 1459 } 1460 1461 if (Immediate::is_simm16(x)) { 1462 if (emit) { z_lghi(t, x); } 1463 return 4; 1464 } 1465 1466 // 64 bit value: | part1 | part2 | part3 | part4 | 1467 // At least one part is not zero! 1468 // Note: Right shift is only cleanly defined for unsigned types 1469 // or for signed types with nonnegative values. 1470 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1471 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1472 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1473 int part4 = (int)x & 0x0000ffff; 1474 int part12 = (int)((unsigned long)x >> 32); 1475 int part34 = (int)x; 1476 1477 // Lower word only (unsigned). 1478 if (part12 == 0) { 1479 if (part3 == 0) { 1480 if (emit) z_llill(t, part4); 1481 return 4; 1482 } 1483 if (part4 == 0) { 1484 if (emit) z_llilh(t, part3); 1485 return 4; 1486 } 1487 if (emit) z_llilf(t, part34); 1488 return 6; 1489 } 1490 1491 // Upper word only. 1492 if (part34 == 0) { 1493 if (part1 == 0) { 1494 if (emit) z_llihl(t, part2); 1495 return 4; 1496 } 1497 if (part2 == 0) { 1498 if (emit) z_llihh(t, part1); 1499 return 4; 1500 } 1501 if (emit) z_llihf(t, part12); 1502 return 6; 1503 } 1504 1505 // Lower word only (signed). 1506 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1507 if (emit) z_lgfi(t, part34); 1508 return 6; 1509 } 1510 1511 int len = 0; 1512 1513 if ((part1 == 0) || (part2 == 0)) { 1514 if (part1 == 0) { 1515 if (emit) z_llihl(t, part2); 1516 len += 4; 1517 } else { 1518 if (emit) z_llihh(t, part1); 1519 len += 4; 1520 } 1521 } else { 1522 if (emit) z_llihf(t, part12); 1523 len += 6; 1524 } 1525 1526 if ((part3 == 0) || (part4 == 0)) { 1527 if (part3 == 0) { 1528 if (emit) z_iill(t, part4); 1529 len += 4; 1530 } else { 1531 if (emit) z_iilh(t, part3); 1532 len += 4; 1533 } 1534 } else { 1535 if (emit) z_iilf(t, part34); 1536 len += 6; 1537 } 1538 return len; 1539 } 1540 1541 //===================================================================== 1542 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1543 //===================================================================== 1544 1545 // Note: In the worst case, one of the scratch registers is destroyed!!! 1546 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1547 // Right operand is constant. 1548 if (x2.is_constant()) { 1549 jlong value = x2.as_constant(); 1550 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1551 return; 1552 } 1553 1554 // Right operand is in register. 1555 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1556 } 1557 1558 // Note: In the worst case, one of the scratch registers is destroyed!!! 1559 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1560 // Right operand is constant. 1561 if (x2.is_constant()) { 1562 jlong value = x2.as_constant(); 1563 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1564 return; 1565 } 1566 1567 // Right operand is in register. 1568 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1569 } 1570 1571 // Note: In the worst case, one of the scratch registers is destroyed!!! 1572 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1573 // Right operand is constant. 1574 if (x2.is_constant()) { 1575 jlong value = x2.as_constant(); 1576 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1577 return; 1578 } 1579 1580 // Right operand is in register. 1581 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1582 } 1583 1584 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1585 // Right operand is constant. 1586 if (x2.is_constant()) { 1587 jlong value = x2.as_constant(); 1588 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1589 return; 1590 } 1591 1592 // Right operand is in register. 1593 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1594 } 1595 1596 // Generate an optimal branch to the branch target. 1597 // Optimal means that a relative branch (brc or brcl) is used if the 1598 // branch distance is short enough. Loading the target address into a 1599 // register and branching via reg is used as fallback only. 1600 // 1601 // Used registers: 1602 // Z_R1 - work reg. Holds branch target address. 1603 // Used in fallback case only. 1604 // 1605 // This version of branch_optimized is good for cases where the target address is known 1606 // and constant, i.e. is never changed (no relocation, no patching). 1607 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1608 address branch_origin = pc(); 1609 1610 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1611 z_brc(cond, branch_addr); 1612 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1613 z_brcl(cond, branch_addr); 1614 } else { 1615 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1616 z_bcr(cond, Z_R1); 1617 } 1618 } 1619 1620 // This version of branch_optimized is good for cases where the target address 1621 // is potentially not yet known at the time the code is emitted. 1622 // 1623 // One very common case is a branch to an unbound label which is handled here. 1624 // The caller might know (or hope) that the branch distance is short enough 1625 // to be encoded in a 16bit relative address. In this case he will pass a 1626 // NearLabel branch_target. 1627 // Care must be taken with unbound labels. Each call to target(label) creates 1628 // an entry in the patch queue for that label to patch all references of the label 1629 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1630 // an assertion fires at patch time. 1631 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1632 if (branch_target.is_bound()) { 1633 address branch_addr = target(branch_target); 1634 branch_optimized(cond, branch_addr); 1635 } else if (branch_target.is_near()) { 1636 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1637 } else { 1638 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1639 } 1640 } 1641 1642 // Generate an optimal compare and branch to the branch target. 1643 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1644 // branch distance is short enough. Loading the target address into a 1645 // register and branching via reg is used as fallback only. 1646 // 1647 // Input: 1648 // r1 - left compare operand 1649 // r2 - right compare operand 1650 void MacroAssembler::compare_and_branch_optimized(Register r1, 1651 Register r2, 1652 Assembler::branch_condition cond, 1653 address branch_addr, 1654 bool len64, 1655 bool has_sign) { 1656 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1657 1658 address branch_origin = pc(); 1659 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1660 switch (casenum) { 1661 case 0: z_crj( r1, r2, cond, branch_addr); break; 1662 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1663 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1664 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1665 default: ShouldNotReachHere(); break; 1666 } 1667 } else { 1668 switch (casenum) { 1669 case 0: z_cr( r1, r2); break; 1670 case 1: z_clr(r1, r2); break; 1671 case 2: z_cgr(r1, r2); break; 1672 case 3: z_clgr(r1, r2); break; 1673 default: ShouldNotReachHere(); break; 1674 } 1675 branch_optimized(cond, branch_addr); 1676 } 1677 } 1678 1679 // Generate an optimal compare and branch to the branch target. 1680 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1681 // branch distance is short enough. Loading the target address into a 1682 // register and branching via reg is used as fallback only. 1683 // 1684 // Input: 1685 // r1 - left compare operand (in register) 1686 // x2 - right compare operand (immediate) 1687 void MacroAssembler::compare_and_branch_optimized(Register r1, 1688 jlong x2, 1689 Assembler::branch_condition cond, 1690 Label& branch_target, 1691 bool len64, 1692 bool has_sign) { 1693 address branch_origin = pc(); 1694 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1695 bool is_RelAddr16 = branch_target.is_near() || 1696 (branch_target.is_bound() && 1697 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1698 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1699 1700 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1701 switch (casenum) { 1702 case 0: z_cij( r1, x2, cond, branch_target); break; 1703 case 1: z_clij(r1, x2, cond, branch_target); break; 1704 case 2: z_cgij(r1, x2, cond, branch_target); break; 1705 case 3: z_clgij(r1, x2, cond, branch_target); break; 1706 default: ShouldNotReachHere(); break; 1707 } 1708 return; 1709 } 1710 1711 if (x2 == 0) { 1712 switch (casenum) { 1713 case 0: z_ltr(r1, r1); break; 1714 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1715 case 2: z_ltgr(r1, r1); break; 1716 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1717 default: ShouldNotReachHere(); break; 1718 } 1719 } else { 1720 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1721 switch (casenum) { 1722 case 0: z_chi(r1, x2); break; 1723 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1724 case 2: z_cghi(r1, x2); break; 1725 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1726 default: break; 1727 } 1728 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1729 switch (casenum) { 1730 case 0: z_cfi( r1, x2); break; 1731 case 1: z_clfi(r1, x2); break; 1732 case 2: z_cgfi(r1, x2); break; 1733 case 3: z_clgfi(r1, x2); break; 1734 default: ShouldNotReachHere(); break; 1735 } 1736 } else { 1737 // No instruction with immediate operand possible, so load into register. 1738 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1739 load_const_optimized(scratch, x2); 1740 switch (casenum) { 1741 case 0: z_cr( r1, scratch); break; 1742 case 1: z_clr(r1, scratch); break; 1743 case 2: z_cgr(r1, scratch); break; 1744 case 3: z_clgr(r1, scratch); break; 1745 default: ShouldNotReachHere(); break; 1746 } 1747 } 1748 } 1749 branch_optimized(cond, branch_target); 1750 } 1751 1752 // Generate an optimal compare and branch to the branch target. 1753 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1754 // branch distance is short enough. Loading the target address into a 1755 // register and branching via reg is used as fallback only. 1756 // 1757 // Input: 1758 // r1 - left compare operand 1759 // r2 - right compare operand 1760 void MacroAssembler::compare_and_branch_optimized(Register r1, 1761 Register r2, 1762 Assembler::branch_condition cond, 1763 Label& branch_target, 1764 bool len64, 1765 bool has_sign) { 1766 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1767 1768 if (branch_target.is_bound()) { 1769 address branch_addr = target(branch_target); 1770 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1771 } else { 1772 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1773 switch (casenum) { 1774 case 0: z_crj( r1, r2, cond, branch_target); break; 1775 case 1: z_clrj( r1, r2, cond, branch_target); break; 1776 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1777 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1778 default: ShouldNotReachHere(); break; 1779 } 1780 } else { 1781 switch (casenum) { 1782 case 0: z_cr( r1, r2); break; 1783 case 1: z_clr(r1, r2); break; 1784 case 2: z_cgr(r1, r2); break; 1785 case 3: z_clgr(r1, r2); break; 1786 default: ShouldNotReachHere(); break; 1787 } 1788 branch_optimized(cond, branch_target); 1789 } 1790 } 1791 } 1792 1793 //=========================================================================== 1794 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1795 //=========================================================================== 1796 1797 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1798 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1799 int index = oop_recorder()->allocate_metadata_index(obj); 1800 RelocationHolder rspec = metadata_Relocation::spec(index); 1801 return AddressLiteral((address)obj, rspec); 1802 } 1803 1804 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1805 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1806 int index = oop_recorder()->find_index(obj); 1807 RelocationHolder rspec = metadata_Relocation::spec(index); 1808 return AddressLiteral((address)obj, rspec); 1809 } 1810 1811 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1812 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1813 int oop_index = oop_recorder()->allocate_oop_index(obj); 1814 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1815 } 1816 1817 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1818 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1819 int oop_index = oop_recorder()->find_index(obj); 1820 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1821 } 1822 1823 // NOTE: destroys r 1824 void MacroAssembler::c2bool(Register r, Register t) { 1825 z_lcr(t, r); // t = -r 1826 z_or(r, t); // r = -r OR r 1827 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1828 } 1829 1830 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1831 // and return the resulting instruction. 1832 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1833 // relative positions. 1834 // Use correct argument types. Do not pre-calculate distance. 1835 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1836 int c = 0; 1837 unsigned long patched_inst = 0; 1838 if (is_call_pcrelative_short(inst) || 1839 is_branch_pcrelative_short(inst) || 1840 is_branchoncount_pcrelative_short(inst) || 1841 is_branchonindex32_pcrelative_short(inst)) { 1842 c = 1; 1843 int m = fmask(15, 0); // simm16(-1, 16, 32); 1844 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1845 patched_inst = (inst & ~m) | v; 1846 } else if (is_compareandbranch_pcrelative_short(inst)) { 1847 c = 2; 1848 long m = fmask(31, 16); // simm16(-1, 16, 48); 1849 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1850 patched_inst = (inst & ~m) | v; 1851 } else if (is_branchonindex64_pcrelative_short(inst)) { 1852 c = 3; 1853 long m = fmask(31, 16); // simm16(-1, 16, 48); 1854 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1855 patched_inst = (inst & ~m) | v; 1856 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1857 c = 4; 1858 long m = fmask(31, 0); // simm32(-1, 16, 48); 1859 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1860 patched_inst = (inst & ~m) | v; 1861 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1862 c = 5; 1863 long m = fmask(31, 0); // simm32(-1, 16, 48); 1864 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1865 patched_inst = (inst & ~m) | v; 1866 } else { 1867 print_dbg_msg(tty, inst, "not a relative branch", 0); 1868 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1869 ShouldNotReachHere(); 1870 } 1871 1872 long new_off = get_pcrel_offset(patched_inst); 1873 if (new_off != (dest_pos-inst_pos)) { 1874 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1875 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1876 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1877 #ifdef LUCY_DBG 1878 VM_Version::z_SIGSEGV(); 1879 #endif 1880 ShouldNotReachHere(); 1881 } 1882 return patched_inst; 1883 } 1884 1885 // Only called when binding labels (share/vm/asm/assembler.cpp) 1886 // Pass arguments as intended. Do not pre-calculate distance. 1887 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1888 unsigned long stub_inst; 1889 int inst_len = get_instruction(branch, &stub_inst); 1890 1891 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1892 } 1893 1894 1895 // Extract relative address (aka offset). 1896 // inv_simm16 works for 4-byte instructions only. 1897 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1898 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1899 1900 if (MacroAssembler::is_pcrelative_short(inst)) { 1901 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1902 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1903 } else { 1904 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1905 } 1906 } 1907 1908 if (MacroAssembler::is_pcrelative_long(inst)) { 1909 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1910 } 1911 1912 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1913 #ifdef LUCY_DBG 1914 VM_Version::z_SIGSEGV(); 1915 #else 1916 ShouldNotReachHere(); 1917 #endif 1918 return -1; 1919 } 1920 1921 long MacroAssembler::get_pcrel_offset(address pc) { 1922 unsigned long inst; 1923 unsigned int len = get_instruction(pc, &inst); 1924 1925 #ifdef ASSERT 1926 long offset; 1927 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1928 offset = get_pcrel_offset(inst); 1929 } else { 1930 offset = -1; 1931 } 1932 1933 if (offset == -1) { 1934 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1935 #ifdef LUCY_DBG 1936 VM_Version::z_SIGSEGV(); 1937 #else 1938 ShouldNotReachHere(); 1939 #endif 1940 } 1941 return offset; 1942 #else 1943 return get_pcrel_offset(inst); 1944 #endif // ASSERT 1945 } 1946 1947 // Get target address from pc-relative instructions. 1948 address MacroAssembler::get_target_addr_pcrel(address pc) { 1949 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1950 return pc + get_pcrel_offset(pc); 1951 } 1952 1953 // Patch pc relative load address. 1954 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1955 unsigned long inst; 1956 // Offset is +/- 2**32 -> use long. 1957 ptrdiff_t distance = con - pc; 1958 1959 get_instruction(pc, &inst); 1960 1961 if (is_pcrelative_short(inst)) { 1962 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1963 1964 // Some extra safety net. 1965 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1966 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1967 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1968 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1969 } 1970 return; 1971 } 1972 1973 if (is_pcrelative_long(inst)) { 1974 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1975 1976 // Some Extra safety net. 1977 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1978 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1979 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1980 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1981 } 1982 return; 1983 } 1984 1985 guarantee(false, "not a pcrelative instruction to patch!"); 1986 } 1987 1988 // "Current PC" here means the address just behind the basr instruction. 1989 address MacroAssembler::get_PC(Register result) { 1990 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 1991 return pc(); 1992 } 1993 1994 // Get current PC + offset. 1995 // Offset given in bytes, must be even! 1996 // "Current PC" here means the address of the larl instruction plus the given offset. 1997 address MacroAssembler::get_PC(Register result, int64_t offset) { 1998 address here = pc(); 1999 z_larl(result, offset/2); // Save target instruction address in result. 2000 return here + offset; 2001 } 2002 2003 void MacroAssembler::instr_size(Register size, Register pc) { 2004 // Extract 2 most significant bits of current instruction. 2005 z_llgc(size, Address(pc)); 2006 z_srl(size, 6); 2007 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 2008 z_ahi(size, 3); 2009 z_nill(size, 6); 2010 } 2011 2012 // Resize_frame with SP(new) = SP(old) - [offset]. 2013 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 2014 { 2015 assert_different_registers(offset, fp, Z_SP); 2016 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2017 2018 z_sgr(Z_SP, offset); 2019 z_stg(fp, _z_abi(callers_sp), Z_SP); 2020 } 2021 2022 // Resize_frame with SP(new) = [newSP] + offset. 2023 // This emitter is useful if we already have calculated a pointer 2024 // into the to-be-allocated stack space, e.g. with special alignment properties, 2025 // but need some additional space, e.g. for spilling. 2026 // newSP is the pre-calculated pointer. It must not be modified. 2027 // fp holds, or is filled with, the frame pointer. 2028 // offset is the additional increment which is added to addr to form the new SP. 2029 // Note: specify a negative value to reserve more space! 2030 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2031 // It does not guarantee that fp contains the frame pointer at the end. 2032 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2033 assert_different_registers(newSP, fp, Z_SP); 2034 2035 if (load_fp) { 2036 z_lg(fp, _z_abi(callers_sp), Z_SP); 2037 } 2038 2039 add2reg(Z_SP, offset, newSP); 2040 z_stg(fp, _z_abi(callers_sp), Z_SP); 2041 } 2042 2043 // Resize_frame with SP(new) = [newSP]. 2044 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2045 // It does not guarantee that fp contains the frame pointer at the end. 2046 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2047 assert_different_registers(newSP, fp, Z_SP); 2048 2049 if (load_fp) { 2050 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2051 } 2052 2053 z_lgr(Z_SP, newSP); 2054 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2055 z_stg(fp, _z_abi(callers_sp), newSP); 2056 } else { 2057 z_stg(fp, _z_abi(callers_sp), Z_SP); 2058 } 2059 } 2060 2061 // Resize_frame with SP(new) = SP(old) + offset. 2062 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2063 assert_different_registers(fp, Z_SP); 2064 2065 if (load_fp) { 2066 z_lg(fp, _z_abi(callers_sp), Z_SP); 2067 } 2068 add64(Z_SP, offset); 2069 z_stg(fp, _z_abi(callers_sp), Z_SP); 2070 } 2071 2072 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2073 #ifdef ASSERT 2074 assert_different_registers(bytes, old_sp, Z_SP); 2075 if (!copy_sp) { 2076 z_cgr(old_sp, Z_SP); 2077 asm_assert_eq("[old_sp]!=[Z_SP]", 0x211); 2078 } 2079 #endif 2080 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2081 if (bytes_with_inverted_sign) { 2082 z_agr(Z_SP, bytes); 2083 } else { 2084 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2085 } 2086 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2087 } 2088 2089 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2090 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2091 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2092 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2093 2094 // We must not write outside the current stack bounds (given by Z_SP). 2095 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2096 // We rely on Z_R0 by default to be available as scratch. 2097 z_lgr(scratch, Z_SP); 2098 add2reg(Z_SP, -offset); 2099 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2100 #ifdef ASSERT 2101 // Just make sure nobody uses the value in the default scratch register. 2102 // When another register is used, the caller might rely on it containing the frame pointer. 2103 if (scratch == Z_R0) { 2104 z_iihf(scratch, 0xbaadbabe); 2105 z_iilf(scratch, 0xdeadbeef); 2106 } 2107 #endif 2108 return offset; 2109 } 2110 2111 // Push a frame of size `bytes' plus abi160 on top. 2112 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2113 BLOCK_COMMENT("push_frame_abi160 {"); 2114 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2115 BLOCK_COMMENT("} push_frame_abi160"); 2116 return res; 2117 } 2118 2119 // Pop current C frame. 2120 void MacroAssembler::pop_frame() { 2121 BLOCK_COMMENT("pop_frame:"); 2122 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2123 } 2124 2125 // Pop current C frame and restore return PC register (Z_R14). 2126 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2127 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2128 int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes; 2129 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2130 if (Displacement::is_validDisp(retPC_offset)) { 2131 z_lg(Z_R14, retPC_offset, Z_SP); 2132 add2reg(Z_SP, frame_size_in_bytes); 2133 } else { 2134 add2reg(Z_SP, frame_size_in_bytes); 2135 restore_return_pc(); 2136 } 2137 } 2138 2139 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2140 if (allow_relocation) { 2141 call_c(entry_point); 2142 } else { 2143 call_c_static(entry_point); 2144 } 2145 } 2146 2147 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2148 bool allow_relocation = true; 2149 call_VM_leaf_base(entry_point, allow_relocation); 2150 } 2151 2152 void MacroAssembler::call_VM_base(Register oop_result, 2153 Register last_java_sp, 2154 address entry_point, 2155 bool allow_relocation, 2156 bool check_exceptions) { // Defaults to true. 2157 // Allow_relocation indicates, if true, that the generated code shall 2158 // be fit for code relocation or referenced data relocation. In other 2159 // words: all addresses must be considered variable. PC-relative addressing 2160 // is not possible then. 2161 // On the other hand, if (allow_relocation == false), addresses and offsets 2162 // may be considered stable, enabling us to take advantage of some PC-relative 2163 // addressing tweaks. These might improve performance and reduce code size. 2164 2165 // Determine last_java_sp register. 2166 if (!last_java_sp->is_valid()) { 2167 last_java_sp = Z_SP; // Load Z_SP as SP. 2168 } 2169 2170 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2171 2172 // ARG1 must hold thread address. 2173 z_lgr(Z_ARG1, Z_thread); 2174 2175 address return_pc = NULL; 2176 if (allow_relocation) { 2177 return_pc = call_c(entry_point); 2178 } else { 2179 return_pc = call_c_static(entry_point); 2180 } 2181 2182 reset_last_Java_frame(allow_relocation); 2183 2184 // C++ interp handles this in the interpreter. 2185 check_and_handle_popframe(Z_thread); 2186 check_and_handle_earlyret(Z_thread); 2187 2188 // Check for pending exceptions. 2189 if (check_exceptions) { 2190 // Check for pending exceptions (java_thread is set upon return). 2191 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2192 2193 // This used to conditionally jump to forward_exception however it is 2194 // possible if we relocate that the branch will not reach. So we must jump 2195 // around so we can always reach. 2196 2197 Label ok; 2198 z_bre(ok); // Bcondequal is the same as bcondZero. 2199 call_stub(StubRoutines::forward_exception_entry()); 2200 bind(ok); 2201 } 2202 2203 // Get oop result if there is one and reset the value in the thread. 2204 if (oop_result->is_valid()) { 2205 get_vm_result(oop_result); 2206 } 2207 2208 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2209 } 2210 2211 void MacroAssembler::call_VM_base(Register oop_result, 2212 Register last_java_sp, 2213 address entry_point, 2214 bool check_exceptions) { // Defaults to true. 2215 bool allow_relocation = true; 2216 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2217 } 2218 2219 // VM calls without explicit last_java_sp. 2220 2221 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2222 // Call takes possible detour via InterpreterMacroAssembler. 2223 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2224 } 2225 2226 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2227 // Z_ARG1 is reserved for the thread. 2228 lgr_if_needed(Z_ARG2, arg_1); 2229 call_VM(oop_result, entry_point, check_exceptions); 2230 } 2231 2232 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2233 // Z_ARG1 is reserved for the thread. 2234 lgr_if_needed(Z_ARG2, arg_1); 2235 assert(arg_2 != Z_ARG2, "smashed argument"); 2236 lgr_if_needed(Z_ARG3, arg_2); 2237 call_VM(oop_result, entry_point, check_exceptions); 2238 } 2239 2240 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2241 Register arg_3, bool check_exceptions) { 2242 // Z_ARG1 is reserved for the thread. 2243 lgr_if_needed(Z_ARG2, arg_1); 2244 assert(arg_2 != Z_ARG2, "smashed argument"); 2245 lgr_if_needed(Z_ARG3, arg_2); 2246 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2247 lgr_if_needed(Z_ARG4, arg_3); 2248 call_VM(oop_result, entry_point, check_exceptions); 2249 } 2250 2251 // VM static calls without explicit last_java_sp. 2252 2253 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2254 // Call takes possible detour via InterpreterMacroAssembler. 2255 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2256 } 2257 2258 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2259 Register arg_3, bool check_exceptions) { 2260 // Z_ARG1 is reserved for the thread. 2261 lgr_if_needed(Z_ARG2, arg_1); 2262 assert(arg_2 != Z_ARG2, "smashed argument"); 2263 lgr_if_needed(Z_ARG3, arg_2); 2264 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2265 lgr_if_needed(Z_ARG4, arg_3); 2266 call_VM_static(oop_result, entry_point, check_exceptions); 2267 } 2268 2269 // VM calls with explicit last_java_sp. 2270 2271 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2272 // Call takes possible detour via InterpreterMacroAssembler. 2273 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2274 } 2275 2276 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2277 // Z_ARG1 is reserved for the thread. 2278 lgr_if_needed(Z_ARG2, arg_1); 2279 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2280 } 2281 2282 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2283 Register arg_2, bool check_exceptions) { 2284 // Z_ARG1 is reserved for the thread. 2285 lgr_if_needed(Z_ARG2, arg_1); 2286 assert(arg_2 != Z_ARG2, "smashed argument"); 2287 lgr_if_needed(Z_ARG3, arg_2); 2288 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2289 } 2290 2291 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2292 Register arg_2, Register arg_3, bool check_exceptions) { 2293 // Z_ARG1 is reserved for the thread. 2294 lgr_if_needed(Z_ARG2, arg_1); 2295 assert(arg_2 != Z_ARG2, "smashed argument"); 2296 lgr_if_needed(Z_ARG3, arg_2); 2297 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2298 lgr_if_needed(Z_ARG4, arg_3); 2299 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2300 } 2301 2302 // VM leaf calls. 2303 2304 void MacroAssembler::call_VM_leaf(address entry_point) { 2305 // Call takes possible detour via InterpreterMacroAssembler. 2306 call_VM_leaf_base(entry_point, true); 2307 } 2308 2309 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2310 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2311 call_VM_leaf(entry_point); 2312 } 2313 2314 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2315 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2316 assert(arg_2 != Z_ARG1, "smashed argument"); 2317 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2318 call_VM_leaf(entry_point); 2319 } 2320 2321 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2322 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2323 assert(arg_2 != Z_ARG1, "smashed argument"); 2324 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2325 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2326 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2327 call_VM_leaf(entry_point); 2328 } 2329 2330 // Static VM leaf calls. 2331 // Really static VM leaf calls are never patched. 2332 2333 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2334 // Call takes possible detour via InterpreterMacroAssembler. 2335 call_VM_leaf_base(entry_point, false); 2336 } 2337 2338 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2339 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2340 call_VM_leaf_static(entry_point); 2341 } 2342 2343 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2344 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2345 assert(arg_2 != Z_ARG1, "smashed argument"); 2346 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2347 call_VM_leaf_static(entry_point); 2348 } 2349 2350 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2351 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2352 assert(arg_2 != Z_ARG1, "smashed argument"); 2353 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2354 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2355 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2356 call_VM_leaf_static(entry_point); 2357 } 2358 2359 // Don't use detour via call_c(reg). 2360 address MacroAssembler::call_c(address function_entry) { 2361 load_const(Z_R1, function_entry); 2362 return call(Z_R1); 2363 } 2364 2365 // Variant for really static (non-relocatable) calls which are never patched. 2366 address MacroAssembler::call_c_static(address function_entry) { 2367 load_absolute_address(Z_R1, function_entry); 2368 #if 0 // def ASSERT 2369 // Verify that call site did not move. 2370 load_const_optimized(Z_R0, function_entry); 2371 z_cgr(Z_R1, Z_R0); 2372 z_brc(bcondEqual, 3); 2373 z_illtrap(0xba); 2374 #endif 2375 return call(Z_R1); 2376 } 2377 2378 address MacroAssembler::call_c_opt(address function_entry) { 2379 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2380 _last_calls_return_pc = success ? pc() : NULL; 2381 return _last_calls_return_pc; 2382 } 2383 2384 // Identify a call_far_patchable instruction: LARL + LG + BASR 2385 // 2386 // nop ; optionally, if required for alignment 2387 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2388 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2389 // 2390 // Code pattern will eventually get patched into variant2 (see below for detection code). 2391 // 2392 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2393 address iaddr = instruction_addr; 2394 2395 // Check for the actual load instruction. 2396 if (!is_load_const_from_toc(iaddr)) { return false; } 2397 iaddr += load_const_from_toc_size(); 2398 2399 // Check for the call (BASR) instruction, finally. 2400 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2401 return is_call_byregister(iaddr); 2402 } 2403 2404 // Identify a call_far_patchable instruction: BRASL 2405 // 2406 // Code pattern to suits atomic patching: 2407 // nop ; Optionally, if required for alignment. 2408 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2409 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2410 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2411 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2412 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2413 2414 // Check for correct number of leading nops. 2415 address iaddr; 2416 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2417 if (!is_z_nop(iaddr)) { return false; } 2418 } 2419 assert(iaddr == call_addr, "sanity"); 2420 2421 // --> Check for call instruction. 2422 if (is_call_far_pcrelative(call_addr)) { 2423 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2424 return true; 2425 } 2426 2427 return false; 2428 } 2429 2430 // Emit a NOT mt-safely patchable 64 bit absolute call. 2431 // If toc_offset == -2, then the destination of the call (= target) is emitted 2432 // to the constant pool and a runtime_call relocation is added 2433 // to the code buffer. 2434 // If toc_offset != -2, target must already be in the constant pool at 2435 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2436 // from the runtime_call relocation). 2437 // Special handling of emitting to scratch buffer when there is no constant pool. 2438 // Slightly changed code pattern. We emit an additional nop if we would 2439 // not end emitting at a word aligned address. This is to ensure 2440 // an atomically patchable displacement in brasl instructions. 2441 // 2442 // A call_far_patchable comes in different flavors: 2443 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2444 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2445 // - BRASL (relative address of call target coded in instruction) 2446 // All flavors occupy the same amount of space. Length differences are compensated 2447 // by leading nops, such that the instruction sequence always ends at the same 2448 // byte offset. This is required to keep the return offset constant. 2449 // Furthermore, the return address (the end of the instruction sequence) is forced 2450 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2451 // need to patch the call target of the BRASL flavor. 2452 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2453 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2454 // Get current pc and ensure word alignment for end of instr sequence. 2455 const address start_pc = pc(); 2456 const intptr_t start_off = offset(); 2457 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2458 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2459 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2460 const bool emit_relative_call = !emit_target_to_pool && 2461 RelAddr::is_in_range_of_RelAddr32(dist) && 2462 ReoptimizeCallSequences && 2463 !code_section()->scratch_emit(); 2464 2465 if (emit_relative_call) { 2466 // Add padding to get the same size as below. 2467 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2468 unsigned int current_padding; 2469 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2470 assert(current_padding == padding, "sanity"); 2471 2472 // relative call: len = 2(nop) + 6 (brasl) 2473 // CodeBlob resize cannot occur in this case because 2474 // this call is emitted into pre-existing space. 2475 z_nop(); // Prepend each BRASL with a nop. 2476 z_brasl(Z_R14, target); 2477 } else { 2478 // absolute call: Get address from TOC. 2479 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2480 if (emit_target_to_pool) { 2481 // When emitting the call for the first time, we do not need to use 2482 // the pc-relative version. It will be patched anyway, when the code 2483 // buffer is copied. 2484 // Relocation is not needed when !ReoptimizeCallSequences. 2485 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2486 AddressLiteral dest(target, rt); 2487 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2488 // inst_mark(). Reset if possible. 2489 bool reset_mark = (inst_mark() == pc()); 2490 tocOffset = store_oop_in_toc(dest); 2491 if (reset_mark) { set_inst_mark(); } 2492 if (tocOffset == -1) { 2493 return false; // Couldn't create constant pool entry. 2494 } 2495 } 2496 assert(offset() == start_off, "emit no code before this point!"); 2497 2498 address tocPos = pc() + tocOffset; 2499 if (emit_target_to_pool) { 2500 tocPos = code()->consts()->start() + tocOffset; 2501 } 2502 load_long_pcrelative(Z_R14, tocPos); 2503 z_basr(Z_R14, Z_R14); 2504 } 2505 2506 #ifdef ASSERT 2507 // Assert that we can identify the emitted call. 2508 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2509 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2510 2511 if (emit_target_to_pool) { 2512 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2513 "wrong encoding of dest address"); 2514 } 2515 #endif 2516 return true; // success 2517 } 2518 2519 // Identify a call_far_patchable instruction. 2520 // For more detailed information see header comment of call_far_patchable. 2521 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2522 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2523 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2524 } 2525 2526 // Does the call_far_patchable instruction use a pc-relative encoding 2527 // of the call destination? 2528 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2529 // Variant 2 is pc-relative. 2530 return is_call_far_patchable_variant2_at(instruction_addr); 2531 } 2532 2533 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2534 // Prepend each BRASL with a nop. 2535 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2536 } 2537 2538 // Set destination address of a call_far_patchable instruction. 2539 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2540 ResourceMark rm; 2541 2542 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2543 int code_size = MacroAssembler::call_far_patchable_size(); 2544 CodeBuffer buf(instruction_addr, code_size); 2545 MacroAssembler masm(&buf); 2546 masm.call_far_patchable(dest, tocOffset); 2547 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2548 } 2549 2550 // Get dest address of a call_far_patchable instruction. 2551 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2552 // Dynamic TOC: absolute address in constant pool. 2553 // Check variant2 first, it is more frequent. 2554 2555 // Relative address encoded in call instruction. 2556 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2557 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2558 2559 // Absolute address in constant pool. 2560 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2561 address iaddr = instruction_addr; 2562 2563 long tocOffset = get_load_const_from_toc_offset(iaddr); 2564 address tocLoc = iaddr + tocOffset; 2565 return *(address *)(tocLoc); 2566 } else { 2567 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2568 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2569 *(unsigned long*)instruction_addr, 2570 *(unsigned long*)(instruction_addr+8), 2571 call_far_patchable_size()); 2572 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2573 ShouldNotReachHere(); 2574 return NULL; 2575 } 2576 } 2577 2578 void MacroAssembler::align_call_far_patchable(address pc) { 2579 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2580 } 2581 2582 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2583 } 2584 2585 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2586 } 2587 2588 // Read from the polling page. 2589 // Use TM or TMY instruction, depending on read offset. 2590 // offset = 0: Use TM, safepoint polling. 2591 // offset < 0: Use TMY, profiling safepoint polling. 2592 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2593 if (Immediate::is_uimm12(offset)) { 2594 z_tm(offset, polling_page_address, mask_safepoint); 2595 } else { 2596 z_tmy(offset, polling_page_address, mask_profiling); 2597 } 2598 } 2599 2600 // Check whether z_instruction is a read access to the polling page 2601 // which was emitted by load_from_polling_page(..). 2602 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2603 unsigned long z_instruction; 2604 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2605 2606 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2607 2608 if (ilen == 4) { 2609 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2610 2611 int ms = inv_mask(z_instruction,8,32); // mask 2612 int ra = inv_reg(z_instruction,16,32); // base register 2613 int ds = inv_uimm12(z_instruction); // displacement 2614 2615 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2616 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2617 } 2618 2619 } else { /* if (ilen == 6) */ 2620 2621 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2622 2623 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2624 2625 int ms = inv_mask(z_instruction,8,48); // mask 2626 int ra = inv_reg(z_instruction,16,48); // base register 2627 int ds = inv_simm20(z_instruction); // displacement 2628 } 2629 2630 return true; 2631 } 2632 2633 // Extract poll address from instruction and ucontext. 2634 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2635 assert(ucontext != NULL, "must have ucontext"); 2636 ucontext_t* uc = (ucontext_t*) ucontext; 2637 unsigned long z_instruction; 2638 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2639 2640 if (ilen == 4 && is_z_tm(z_instruction)) { 2641 int ra = inv_reg(z_instruction, 16, 32); // base register 2642 int ds = inv_uimm12(z_instruction); // displacement 2643 address addr = (address)uc->uc_mcontext.gregs[ra]; 2644 return addr + ds; 2645 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2646 int ra = inv_reg(z_instruction, 16, 48); // base register 2647 int ds = inv_simm20(z_instruction); // displacement 2648 address addr = (address)uc->uc_mcontext.gregs[ra]; 2649 return addr + ds; 2650 } 2651 2652 ShouldNotReachHere(); 2653 return NULL; 2654 } 2655 2656 // Extract poll register from instruction. 2657 uint MacroAssembler::get_poll_register(address instr_loc) { 2658 unsigned long z_instruction; 2659 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2660 2661 if (ilen == 4 && is_z_tm(z_instruction)) { 2662 return (uint)inv_reg(z_instruction, 16, 32); // base register 2663 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2664 return (uint)inv_reg(z_instruction, 16, 48); // base register 2665 } 2666 2667 ShouldNotReachHere(); 2668 return 0; 2669 } 2670 2671 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2672 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2673 // Armed page has poll_bit set. 2674 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2675 z_brnaz(slow_path); 2676 } 2677 2678 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2679 void MacroAssembler::bang_stack_with_offset(int offset) { 2680 // Stack grows down, caller passes positive offset. 2681 assert(offset > 0, "must bang with positive offset"); 2682 if (Displacement::is_validDisp(-offset)) { 2683 z_tmy(-offset, Z_SP, mask_stackbang); 2684 } else { 2685 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2686 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2687 } 2688 } 2689 2690 void MacroAssembler::reserved_stack_check(Register return_pc) { 2691 // Test if reserved zone needs to be enabled. 2692 Label no_reserved_zone_enabling; 2693 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2694 BLOCK_COMMENT("reserved_stack_check {"); 2695 2696 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2697 z_brl(no_reserved_zone_enabling); 2698 2699 // Enable reserved zone again, throw stack overflow exception. 2700 save_return_pc(); 2701 push_frame_abi160(0); 2702 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2703 pop_frame(); 2704 restore_return_pc(); 2705 2706 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry()); 2707 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2708 z_br(Z_R1); 2709 2710 should_not_reach_here(); 2711 2712 bind(no_reserved_zone_enabling); 2713 BLOCK_COMMENT("} reserved_stack_check"); 2714 } 2715 2716 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2717 void MacroAssembler::tlab_allocate(Register obj, 2718 Register var_size_in_bytes, 2719 int con_size_in_bytes, 2720 Register t1, 2721 Label& slow_case) { 2722 assert_different_registers(obj, var_size_in_bytes, t1); 2723 Register end = t1; 2724 Register thread = Z_thread; 2725 2726 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2727 if (var_size_in_bytes == noreg) { 2728 z_lay(end, Address(obj, con_size_in_bytes)); 2729 } else { 2730 z_lay(end, Address(obj, var_size_in_bytes)); 2731 } 2732 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2733 branch_optimized(bcondHigh, slow_case); 2734 2735 // Update the tlab top pointer. 2736 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2737 2738 // Recover var_size_in_bytes if necessary. 2739 if (var_size_in_bytes == end) { 2740 z_sgr(var_size_in_bytes, obj); 2741 } 2742 } 2743 2744 // Emitter for interface method lookup. 2745 // input: recv_klass, intf_klass, itable_index 2746 // output: method_result 2747 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2748 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2749 // If the register is still not needed then, remove it. 2750 void MacroAssembler::lookup_interface_method(Register recv_klass, 2751 Register intf_klass, 2752 RegisterOrConstant itable_index, 2753 Register method_result, 2754 Register temp1_reg, 2755 Label& no_such_interface, 2756 bool return_method) { 2757 2758 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2759 const Register itable_entry_addr = Z_R1_scratch; 2760 const Register itable_interface = Z_R0_scratch; 2761 2762 BLOCK_COMMENT("lookup_interface_method {"); 2763 2764 // Load start of itable entries into itable_entry_addr. 2765 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2766 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2767 2768 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2769 const int vtable_base_offset = in_bytes(Klass::vtable_start_offset()); 2770 2771 add2reg_with_index(itable_entry_addr, 2772 vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), 2773 recv_klass, vtable_len); 2774 2775 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2776 Label search; 2777 2778 bind(search); 2779 2780 // Handle IncompatibleClassChangeError. 2781 // If the entry is NULL then we've reached the end of the table 2782 // without finding the expected interface, so throw an exception. 2783 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2784 z_bre(no_such_interface); 2785 2786 add2reg(itable_entry_addr, itable_offset_search_inc); 2787 z_cgr(itable_interface, intf_klass); 2788 z_brne(search); 2789 2790 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2791 if (return_method) { 2792 const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - 2793 itableOffsetEntry::interface_offset_in_bytes()) - 2794 itable_offset_search_inc; 2795 2796 // Compute itableMethodEntry and get method and entry point 2797 // we use addressing with index and displacement, since the formula 2798 // for computing the entry's offset has a fixed and a dynamic part, 2799 // the latter depending on the matched interface entry and on the case, 2800 // that the itable index has been passed as a register, not a constant value. 2801 int method_offset = itableMethodEntry::method_offset_in_bytes(); 2802 // Fixed part (displacement), common operand. 2803 Register itable_offset = method_result; // Dynamic part (index register). 2804 2805 if (itable_index.is_register()) { 2806 // Compute the method's offset in that register, for the formula, see the 2807 // else-clause below. 2808 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2809 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2810 } else { 2811 // Displacement increases. 2812 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2813 2814 // Load index from itable. 2815 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2816 } 2817 2818 // Finally load the method's oop. 2819 z_lg(method_result, method_offset, itable_offset, recv_klass); 2820 } 2821 BLOCK_COMMENT("} lookup_interface_method"); 2822 } 2823 2824 // Lookup for virtual method invocation. 2825 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2826 RegisterOrConstant vtable_index, 2827 Register method_result) { 2828 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2829 assert(vtableEntry::size() * wordSize == wordSize, 2830 "else adjust the scaling in the code below"); 2831 2832 BLOCK_COMMENT("lookup_virtual_method {"); 2833 2834 const int base = in_bytes(Klass::vtable_start_offset()); 2835 2836 if (vtable_index.is_constant()) { 2837 // Load with base + disp. 2838 Address vtable_entry_addr(recv_klass, 2839 vtable_index.as_constant() * wordSize + 2840 base + 2841 vtableEntry::method_offset_in_bytes()); 2842 2843 z_lg(method_result, vtable_entry_addr); 2844 } else { 2845 // Shift index properly and load with base + index + disp. 2846 Register vindex = vtable_index.as_register(); 2847 Address vtable_entry_addr(recv_klass, vindex, 2848 base + vtableEntry::method_offset_in_bytes()); 2849 2850 z_sllg(vindex, vindex, exact_log2(wordSize)); 2851 z_lg(method_result, vtable_entry_addr); 2852 } 2853 BLOCK_COMMENT("} lookup_virtual_method"); 2854 } 2855 2856 // Factor out code to call ic_miss_handler. 2857 // Generate code to call the inline cache miss handler. 2858 // 2859 // In most cases, this code will be generated out-of-line. 2860 // The method parameters are intended to provide some variability. 2861 // ICM - Label which has to be bound to the start of useful code (past any traps). 2862 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2863 // Any value except 0x00 is supported. 2864 // = 0x00 - do not generate illtrap instructions. 2865 // use nops to fill unused space. 2866 // requiredSize - required size of the generated code. If the actually 2867 // generated code is smaller, use padding instructions to fill up. 2868 // = 0 - no size requirement, no padding. 2869 // scratch - scratch register to hold branch target address. 2870 // 2871 // The method returns the code offset of the bound label. 2872 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2873 intptr_t startOffset = offset(); 2874 2875 // Prevent entry at content_begin(). 2876 if (trapMarker != 0) { 2877 z_illtrap(trapMarker); 2878 } 2879 2880 // Load address of inline cache miss code into scratch register 2881 // and branch to cache miss handler. 2882 BLOCK_COMMENT("IC miss handler {"); 2883 BIND(ICM); 2884 unsigned int labelOffset = offset(); 2885 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2886 2887 load_const_optimized(scratch, icmiss); 2888 z_br(scratch); 2889 2890 // Fill unused space. 2891 if (requiredSize > 0) { 2892 while ((offset() - startOffset) < requiredSize) { 2893 if (trapMarker == 0) { 2894 z_nop(); 2895 } else { 2896 z_illtrap(trapMarker); 2897 } 2898 } 2899 } 2900 BLOCK_COMMENT("} IC miss handler"); 2901 return labelOffset; 2902 } 2903 2904 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2905 Register ic_reg = Z_inline_cache; 2906 int klass_offset = oopDesc::klass_offset_in_bytes(); 2907 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2908 if (VM_Version::has_CompareBranch()) { 2909 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2910 } else { 2911 z_ltgr(Z_ARG1, Z_ARG1); 2912 z_bre(ic_miss); 2913 } 2914 } 2915 // Compare cached class against klass from receiver. 2916 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2917 z_brne(ic_miss); 2918 } 2919 2920 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2921 Register super_klass, 2922 Register temp1_reg, 2923 Label* L_success, 2924 Label* L_failure, 2925 Label* L_slow_path, 2926 RegisterOrConstant super_check_offset) { 2927 2928 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2929 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2930 2931 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2932 bool need_slow_path = (must_load_sco || 2933 super_check_offset.constant_or_zero() == sc_offset); 2934 2935 // Input registers must not overlap. 2936 assert_different_registers(sub_klass, super_klass, temp1_reg); 2937 if (super_check_offset.is_register()) { 2938 assert_different_registers(sub_klass, super_klass, 2939 super_check_offset.as_register()); 2940 } else if (must_load_sco) { 2941 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 2942 } 2943 2944 const Register Rsuper_check_offset = temp1_reg; 2945 2946 NearLabel L_fallthrough; 2947 int label_nulls = 0; 2948 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 2949 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 2950 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 2951 assert(label_nulls <= 1 || 2952 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 2953 "at most one NULL in the batch, usually"); 2954 2955 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 2956 // If the pointers are equal, we are done (e.g., String[] elements). 2957 // This self-check enables sharing of secondary supertype arrays among 2958 // non-primary types such as array-of-interface. Otherwise, each such 2959 // type would need its own customized SSA. 2960 // We move this check to the front of the fast path because many 2961 // type checks are in fact trivially successful in this manner, 2962 // so we get a nicely predicted branch right at the start of the check. 2963 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 2964 2965 // Check the supertype display, which is uint. 2966 if (must_load_sco) { 2967 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 2968 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 2969 } 2970 Address super_check_addr(sub_klass, super_check_offset, 0); 2971 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 2972 2973 // This check has worked decisively for primary supers. 2974 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2975 // (Secondary supers are interfaces and very deeply nested subtypes.) 2976 // This works in the same check above because of a tricky aliasing 2977 // between the super_cache and the primary super display elements. 2978 // (The 'super_check_addr' can address either, as the case requires.) 2979 // Note that the cache is updated below if it does not help us find 2980 // what we need immediately. 2981 // So if it was a primary super, we can just fail immediately. 2982 // Otherwise, it's the slow path for us (no success at this point). 2983 2984 // Hacked jmp, which may only be used just before L_fallthrough. 2985 #define final_jmp(label) \ 2986 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2987 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 2988 2989 if (super_check_offset.is_register()) { 2990 branch_optimized(Assembler::bcondEqual, *L_success); 2991 z_cfi(super_check_offset.as_register(), sc_offset); 2992 if (L_failure == &L_fallthrough) { 2993 branch_optimized(Assembler::bcondEqual, *L_slow_path); 2994 } else { 2995 branch_optimized(Assembler::bcondNotEqual, *L_failure); 2996 final_jmp(*L_slow_path); 2997 } 2998 } else if (super_check_offset.as_constant() == sc_offset) { 2999 // Need a slow path; fast failure is impossible. 3000 if (L_slow_path == &L_fallthrough) { 3001 branch_optimized(Assembler::bcondEqual, *L_success); 3002 } else { 3003 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 3004 final_jmp(*L_success); 3005 } 3006 } else { 3007 // No slow path; it's a fast decision. 3008 if (L_failure == &L_fallthrough) { 3009 branch_optimized(Assembler::bcondEqual, *L_success); 3010 } else { 3011 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3012 final_jmp(*L_success); 3013 } 3014 } 3015 3016 bind(L_fallthrough); 3017 #undef local_brc 3018 #undef final_jmp 3019 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3020 // fallthru (to slow path) 3021 } 3022 3023 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3024 Register Rsuperklass, 3025 Register Rarray_ptr, // tmp 3026 Register Rlength, // tmp 3027 Label* L_success, 3028 Label* L_failure) { 3029 // Input registers must not overlap. 3030 // Also check for R1 which is explicitly used here. 3031 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3032 NearLabel L_fallthrough; 3033 int label_nulls = 0; 3034 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 3035 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 3036 assert(label_nulls <= 1, "at most one NULL in the batch"); 3037 3038 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3039 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3040 3041 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3042 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3043 3044 // Hacked jmp, which may only be used just before L_fallthrough. 3045 #define final_jmp(label) \ 3046 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3047 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3048 3049 NearLabel loop_iterate, loop_count, match; 3050 3051 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3052 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3053 3054 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3055 branch_optimized(Assembler::bcondZero, *L_failure); 3056 3057 // Oops in table are NO MORE compressed. 3058 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3059 z_bre(match); // Shortcut for array length = 1. 3060 3061 // No match yet, so we must walk the array's elements. 3062 z_lngfr(Rlength, Rlength); 3063 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3064 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3065 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3066 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3067 z_bru(loop_count); 3068 3069 BIND(loop_iterate); 3070 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3071 z_bre(match); 3072 BIND(loop_count); 3073 z_brxlg(Rlength, Z_R1, loop_iterate); 3074 3075 // Rsuperklass not found among secondary super classes -> failure. 3076 branch_optimized(Assembler::bcondAlways, *L_failure); 3077 3078 // Got a hit. Return success (zero result). Set cache. 3079 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3080 3081 BIND(match); 3082 3083 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3084 3085 final_jmp(*L_success); 3086 3087 // Exit to the surrounding code. 3088 BIND(L_fallthrough); 3089 #undef local_brc 3090 #undef final_jmp 3091 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3092 } 3093 3094 // Emitter for combining fast and slow path. 3095 void MacroAssembler::check_klass_subtype(Register sub_klass, 3096 Register super_klass, 3097 Register temp1_reg, 3098 Register temp2_reg, 3099 Label& L_success) { 3100 NearLabel failure; 3101 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3102 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3103 &L_success, &failure, NULL); 3104 check_klass_subtype_slow_path(sub_klass, super_klass, 3105 temp1_reg, temp2_reg, &L_success, NULL); 3106 BIND(failure); 3107 BLOCK_COMMENT("} check_klass_subtype"); 3108 } 3109 3110 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3111 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); 3112 3113 Label L_fallthrough; 3114 if (L_fast_path == NULL) { 3115 L_fast_path = &L_fallthrough; 3116 } else if (L_slow_path == NULL) { 3117 L_slow_path = &L_fallthrough; 3118 } 3119 3120 // Fast path check: class is fully initialized 3121 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3122 z_bre(*L_fast_path); 3123 3124 // Fast path check: current thread is initializer thread 3125 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3126 if (L_slow_path == &L_fallthrough) { 3127 z_bre(*L_fast_path); 3128 } else if (L_fast_path == &L_fallthrough) { 3129 z_brne(*L_slow_path); 3130 } else { 3131 Unimplemented(); 3132 } 3133 3134 bind(L_fallthrough); 3135 } 3136 3137 // Increment a counter at counter_address when the eq condition code is 3138 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3139 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3140 Label l; 3141 z_brne(l); 3142 load_const(tmp1_reg, counter_address); 3143 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3144 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3145 bind(l); 3146 } 3147 3148 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3149 Register displacedHeader = temp1; 3150 Register currentHeader = temp1; 3151 Register temp = temp2; 3152 NearLabel done, object_has_monitor; 3153 3154 BLOCK_COMMENT("compiler_fast_lock_object {"); 3155 3156 // Load markWord from oop into mark. 3157 z_lg(displacedHeader, 0, oop); 3158 3159 if (DiagnoseSyncOnValueBasedClasses != 0) { 3160 load_klass(Z_R1_scratch, oop); 3161 z_l(Z_R1_scratch, Address(Z_R1_scratch, Klass::access_flags_offset())); 3162 assert((JVM_ACC_IS_VALUE_BASED_CLASS & 0xFFFF) == 0, "or change following instruction"); 3163 z_nilh(Z_R1_scratch, JVM_ACC_IS_VALUE_BASED_CLASS >> 16); 3164 z_brne(done); 3165 } 3166 3167 // Handle existing monitor. 3168 // The object has an existing monitor iff (mark & monitor_value) != 0. 3169 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3170 z_lr(temp, displacedHeader); 3171 z_nill(temp, markWord::monitor_value); 3172 z_brne(object_has_monitor); 3173 3174 // Set mark to markWord | markWord::unlocked_value. 3175 z_oill(displacedHeader, markWord::unlocked_value); 3176 3177 // Load Compare Value application register. 3178 3179 // Initialize the box (must happen before we update the object mark). 3180 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3181 3182 // Memory Fence (in cmpxchgd) 3183 // Compare object markWord with mark and if equal exchange scratch1 with object markWord. 3184 3185 // If the compare-and-swap succeeded, then we found an unlocked object and we 3186 // have now locked it. 3187 z_csg(displacedHeader, box, 0, oop); 3188 assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3189 z_bre(done); 3190 3191 // We did not see an unlocked object so try the fast recursive case. 3192 3193 z_sgr(currentHeader, Z_SP); 3194 load_const_optimized(temp, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); 3195 3196 z_ngr(currentHeader, temp); 3197 // z_brne(done); 3198 // z_release(); 3199 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3200 3201 z_bru(done); 3202 3203 Register zero = temp; 3204 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3205 bind(object_has_monitor); 3206 // The object's monitor m is unlocked iff m->owner == NULL, 3207 // otherwise m->owner may contain a thread or a stack address. 3208 // 3209 // Try to CAS m->owner from NULL to current thread. 3210 z_lghi(zero, 0); 3211 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. 3212 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3213 // Store a non-null value into the box. 3214 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3215 #ifdef ASSERT 3216 z_brne(done); 3217 // We've acquired the monitor, check some invariants. 3218 // Invariant 1: _recursions should be 0. 3219 asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged, 3220 "monitor->_recursions should be 0", -1); 3221 z_ltgr(zero, zero); // Set CR=EQ. 3222 #endif 3223 bind(done); 3224 3225 BLOCK_COMMENT("} compiler_fast_lock_object"); 3226 // If locking was successful, CR should indicate 'EQ'. 3227 // The compiler or the native wrapper generates a branch to the runtime call 3228 // _complete_monitor_locking_Java. 3229 } 3230 3231 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3232 Register displacedHeader = temp1; 3233 Register currentHeader = temp2; 3234 Register temp = temp1; 3235 Register monitor = temp2; 3236 3237 Label done, object_has_monitor; 3238 3239 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3240 3241 // Find the lock address and load the displaced header from the stack. 3242 // if the displaced header is zero, we have a recursive unlock. 3243 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3244 z_bre(done); 3245 3246 // Handle existing monitor. 3247 // The object has an existing monitor iff (mark & monitor_value) != 0. 3248 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); 3249 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3250 z_nill(currentHeader, markWord::monitor_value); 3251 z_brne(object_has_monitor); 3252 3253 // Check if it is still a light weight lock, this is true if we see 3254 // the stack address of the basicLock in the markWord of the object 3255 // copy box to currentHeader such that csg does not kill it. 3256 z_lgr(currentHeader, box); 3257 z_csg(currentHeader, displacedHeader, 0, oop); 3258 z_bru(done); // Csg sets CR as desired. 3259 3260 // Handle existing monitor. 3261 bind(object_has_monitor); 3262 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set. 3263 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3264 z_brne(done); 3265 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3266 z_brne(done); 3267 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3268 z_brne(done); 3269 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3270 z_brne(done); 3271 z_release(); 3272 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3273 3274 bind(done); 3275 3276 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3277 // flag == EQ indicates success 3278 // flag == NE indicates failure 3279 } 3280 3281 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3282 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3283 bs->resolve_jobject(this, value, tmp1, tmp2); 3284 } 3285 3286 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3287 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3288 BLOCK_COMMENT("set_last_Java_frame {"); 3289 3290 // Always set last_Java_pc and flags first because once last_Java_sp 3291 // is visible has_last_Java_frame is true and users will look at the 3292 // rest of the fields. (Note: flags should always be zero before we 3293 // get here so doesn't need to be set.) 3294 3295 // Verify that last_Java_pc was zeroed on return to Java. 3296 if (allow_relocation) { 3297 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3298 Z_thread, 3299 "last_Java_pc not zeroed before leaving Java", 3300 0x200); 3301 } else { 3302 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3303 Z_thread, 3304 "last_Java_pc not zeroed before leaving Java", 3305 0x200); 3306 } 3307 3308 // When returning from calling out from Java mode the frame anchor's 3309 // last_Java_pc will always be set to NULL. It is set here so that 3310 // if we are doing a call to native (not VM) that we capture the 3311 // known pc and don't have to rely on the native call having a 3312 // standard frame linkage where we can find the pc. 3313 if (last_Java_pc!=noreg) { 3314 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3315 } 3316 3317 // This membar release is not required on z/Architecture, since the sequence of stores 3318 // in maintained. Nevertheless, we leave it in to document the required ordering. 3319 // The implementation of z_release() should be empty. 3320 // z_release(); 3321 3322 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3323 BLOCK_COMMENT("} set_last_Java_frame"); 3324 } 3325 3326 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3327 BLOCK_COMMENT("reset_last_Java_frame {"); 3328 3329 if (allow_relocation) { 3330 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3331 Z_thread, 3332 "SP was not set, still zero", 3333 0x202); 3334 } else { 3335 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3336 Z_thread, 3337 "SP was not set, still zero", 3338 0x202); 3339 } 3340 3341 // _last_Java_sp = 0 3342 // Clearing storage must be atomic here, so don't use clear_mem()! 3343 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3344 3345 // _last_Java_pc = 0 3346 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3347 3348 BLOCK_COMMENT("} reset_last_Java_frame"); 3349 return; 3350 } 3351 3352 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3353 assert_different_registers(sp, tmp1); 3354 3355 // We cannot trust that code generated by the C++ compiler saves R14 3356 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3357 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3358 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3359 // it into the frame anchor. 3360 get_PC(tmp1); 3361 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3362 } 3363 3364 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3365 z_release(); 3366 3367 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3368 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3369 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3370 } 3371 3372 void MacroAssembler::get_vm_result(Register oop_result) { 3373 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3374 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3375 3376 verify_oop(oop_result, FILE_AND_LINE); 3377 } 3378 3379 void MacroAssembler::get_vm_result_2(Register result) { 3380 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3381 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3382 } 3383 3384 // We require that C code which does not return a value in vm_result will 3385 // leave it undisturbed. 3386 void MacroAssembler::set_vm_result(Register oop_result) { 3387 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3388 } 3389 3390 // Explicit null checks (used for method handle code). 3391 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3392 if (!ImplicitNullChecks) { 3393 NearLabel ok; 3394 3395 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3396 3397 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3398 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3399 load_absolute_address(reg, exception_entry); 3400 z_br(reg); 3401 3402 bind(ok); 3403 } else { 3404 if (needs_explicit_null_check((intptr_t)offset)) { 3405 // Provoke OS NULL exception if reg = NULL by 3406 // accessing M[reg] w/o changing any registers. 3407 z_lg(tmp, 0, reg); 3408 } 3409 // else 3410 // Nothing to do, (later) access of M[reg + offset] 3411 // will provoke OS NULL exception if reg = NULL. 3412 } 3413 } 3414 3415 //------------------------------------- 3416 // Compressed Klass Pointers 3417 //------------------------------------- 3418 3419 // Klass oop manipulations if compressed. 3420 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3421 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3422 address base = CompressedKlassPointers::base(); 3423 int shift = CompressedKlassPointers::shift(); 3424 bool need_zero_extend = base != 0; 3425 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3426 3427 BLOCK_COMMENT("cKlass encoder {"); 3428 3429 #ifdef ASSERT 3430 Label ok; 3431 z_tmll(current, KlassAlignmentInBytes-1); // Check alignment. 3432 z_brc(Assembler::bcondAllZero, ok); 3433 // The plain disassembler does not recognize illtrap. It instead displays 3434 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3435 // the proper beginning of the next instruction. 3436 z_illtrap(0xee); 3437 z_illtrap(0xee); 3438 bind(ok); 3439 #endif 3440 3441 // Scale down the incoming klass pointer first. 3442 // We then can be sure we calculate an offset that fits into 32 bit. 3443 // More generally speaking: all subsequent calculations are purely 32-bit. 3444 if (shift != 0) { 3445 assert (LogKlassAlignmentInBytes == shift, "decode alg wrong"); 3446 z_srlg(dst, current, shift); 3447 current = dst; 3448 } 3449 3450 if (base != NULL) { 3451 // Use scaled-down base address parts to match scaled-down klass pointer. 3452 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3453 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3454 3455 // General considerations: 3456 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3457 // Otherwise, we would end up with a compressed klass pointer which doesn't 3458 // fit into 32-bit. 3459 // - Only bit#33 of the difference could potentially be non-zero. For that 3460 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3461 // will create a borrow out of bit#32, nicely killing bit#33. 3462 // - With the above, we only need to consider current_l and base_l to 3463 // calculate the result. 3464 // - Both values are treated as unsigned. The unsigned subtraction is 3465 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3466 3467 if (base_l == 0) { 3468 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3469 // cancel all high-word bits. Otherwise, we would end up with an offset 3470 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3471 // - current_l remains unchanged. 3472 // - Therefore, we can replace all calculation with just a 3473 // zero-extending load 32 to 64 bit. 3474 // - Even that can be replaced with a conditional load if dst != current. 3475 // (this is a local view. The shift step may have requested zero-extension). 3476 } else { 3477 if ((base_h == 0) && is_uimm(base_l, 31)) { 3478 // If we happen to find that (base_h == 0), and that base_l is within the range 3479 // which can be represented by a signed int, then we can use 64bit signed add with 3480 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3481 // upper 32 bits of the result, saving us the need of an extra zero extension. 3482 // For base_l to be in the required range, it must not have the most significant 3483 // bit (aka sign bit) set. 3484 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3485 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3486 need_zero_extend = false; 3487 current = dst; 3488 } else { 3489 // To begin with, we may need to copy and/or zero-extend the register operand. 3490 // We have to calculate (current_l - base_l). Because there is no unsigend 3491 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3492 if (need_zero_extend) { 3493 z_llgfr(dst, current); 3494 need_zero_extend = false; 3495 } else { 3496 llgfr_if_needed(dst, current); 3497 } 3498 current = dst; 3499 z_alfi(dst, -base_l); 3500 } 3501 } 3502 } 3503 3504 if (need_zero_extend) { 3505 // We must zero-extend the calculated result. It may have some leftover bits in 3506 // the hi-word because we only did optimized calculations. 3507 z_llgfr(dst, current); 3508 } else { 3509 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3510 } 3511 3512 BLOCK_COMMENT("} cKlass encoder"); 3513 } 3514 3515 // This function calculates the size of the code generated by 3516 // decode_klass_not_null(register dst, Register src) 3517 // when (Universe::heap() != NULL). Hence, if the instructions 3518 // it generates change, then this method needs to be updated. 3519 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3520 address base = CompressedKlassPointers::base(); 3521 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3522 int addbase_size = 0; 3523 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3524 3525 if (base != NULL) { 3526 unsigned int base_h = ((unsigned long)base)>>32; 3527 unsigned int base_l = (unsigned int)((unsigned long)base); 3528 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3529 addbase_size += 6; /* aih */ 3530 } else if ((base_h == 0) && (base_l != 0)) { 3531 addbase_size += 6; /* algfi */ 3532 } else { 3533 addbase_size += load_const_size(); 3534 addbase_size += 4; /* algr */ 3535 } 3536 } 3537 #ifdef ASSERT 3538 addbase_size += 10; 3539 addbase_size += 2; // Extra sigill. 3540 #endif 3541 return addbase_size + shift_size; 3542 } 3543 3544 // !!! If the instructions that get generated here change 3545 // then function instr_size_for_decode_klass_not_null() 3546 // needs to get updated. 3547 // This variant of decode_klass_not_null() must generate predictable code! 3548 // The code must only depend on globally known parameters. 3549 void MacroAssembler::decode_klass_not_null(Register dst) { 3550 address base = CompressedKlassPointers::base(); 3551 int shift = CompressedKlassPointers::shift(); 3552 int beg_off = offset(); 3553 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3554 3555 BLOCK_COMMENT("cKlass decoder (const size) {"); 3556 3557 if (shift != 0) { // Shift required? 3558 z_sllg(dst, dst, shift); 3559 } 3560 if (base != NULL) { 3561 unsigned int base_h = ((unsigned long)base)>>32; 3562 unsigned int base_l = (unsigned int)((unsigned long)base); 3563 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3564 z_aih(dst, base_h); // Base has no set bits in lower half. 3565 } else if ((base_h == 0) && (base_l != 0)) { 3566 z_algfi(dst, base_l); // Base has no set bits in upper half. 3567 } else { 3568 load_const(Z_R0, base); // Base has set bits everywhere. 3569 z_algr(dst, Z_R0); 3570 } 3571 } 3572 3573 #ifdef ASSERT 3574 Label ok; 3575 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3576 z_brc(Assembler::bcondAllZero, ok); 3577 // The plain disassembler does not recognize illtrap. It instead displays 3578 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3579 // the proper beginning of the next instruction. 3580 z_illtrap(0xd1); 3581 z_illtrap(0xd1); 3582 bind(ok); 3583 #endif 3584 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 3585 3586 BLOCK_COMMENT("} cKlass decoder (const size)"); 3587 } 3588 3589 // This variant of decode_klass_not_null() is for cases where 3590 // 1) the size of the generated instructions may vary 3591 // 2) the result is (potentially) stored in a register different from the source. 3592 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3593 address base = CompressedKlassPointers::base(); 3594 int shift = CompressedKlassPointers::shift(); 3595 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3596 3597 BLOCK_COMMENT("cKlass decoder {"); 3598 3599 if (src == noreg) src = dst; 3600 3601 if (shift != 0) { // Shift or at least move required? 3602 z_sllg(dst, src, shift); 3603 } else { 3604 lgr_if_needed(dst, src); 3605 } 3606 3607 if (base != NULL) { 3608 unsigned int base_h = ((unsigned long)base)>>32; 3609 unsigned int base_l = (unsigned int)((unsigned long)base); 3610 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3611 z_aih(dst, base_h); // Base has not set bits in lower half. 3612 } else if ((base_h == 0) && (base_l != 0)) { 3613 z_algfi(dst, base_l); // Base has no set bits in upper half. 3614 } else { 3615 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 3616 z_algr(dst, Z_R0); 3617 } 3618 } 3619 3620 #ifdef ASSERT 3621 Label ok; 3622 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3623 z_brc(Assembler::bcondAllZero, ok); 3624 // The plain disassembler does not recognize illtrap. It instead displays 3625 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3626 // the proper beginning of the next instruction. 3627 z_illtrap(0xd2); 3628 z_illtrap(0xd2); 3629 bind(ok); 3630 #endif 3631 BLOCK_COMMENT("} cKlass decoder"); 3632 } 3633 3634 void MacroAssembler::load_klass(Register klass, Address mem) { 3635 if (UseCompressedClassPointers) { 3636 z_llgf(klass, mem); 3637 // Attention: no null check here! 3638 decode_klass_not_null(klass); 3639 } else { 3640 z_lg(klass, mem); 3641 } 3642 } 3643 3644 void MacroAssembler::load_klass(Register klass, Register src_oop) { 3645 if (UseCompressedClassPointers) { 3646 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3647 // Attention: no null check here! 3648 decode_klass_not_null(klass); 3649 } else { 3650 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3651 } 3652 } 3653 3654 void MacroAssembler::load_klass_check_null(Register klass, Register src_oop, Register tmp) { 3655 null_check(src_oop, tmp, oopDesc::klass_offset_in_bytes()); 3656 load_klass(klass, src_oop); 3657 } 3658 3659 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 3660 if (UseCompressedClassPointers) { 3661 assert_different_registers(dst_oop, klass, Z_R0); 3662 if (ck == noreg) ck = klass; 3663 encode_klass_not_null(ck, klass); 3664 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3665 } else { 3666 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3667 } 3668 } 3669 3670 void MacroAssembler::store_klass_gap(Register s, Register d) { 3671 if (UseCompressedClassPointers) { 3672 assert(s != d, "not enough registers"); 3673 // Support s = noreg. 3674 if (s != noreg) { 3675 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 3676 } else { 3677 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 3678 } 3679 } 3680 } 3681 3682 // Compare klass ptr in memory against klass ptr in register. 3683 // 3684 // Rop1 - klass in register, always uncompressed. 3685 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 3686 // Rbase - Base address of cKlass in memory. 3687 // maybeNULL - True if Rop1 possibly is a NULL. 3688 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) { 3689 3690 BLOCK_COMMENT("compare klass ptr {"); 3691 3692 if (UseCompressedClassPointers) { 3693 const int shift = CompressedKlassPointers::shift(); 3694 address base = CompressedKlassPointers::base(); 3695 3696 assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift"); 3697 assert_different_registers(Rop1, Z_R0); 3698 assert_different_registers(Rop1, Rbase, Z_R1); 3699 3700 // First encode register oop and then compare with cOop in memory. 3701 // This sequence saves an unnecessary cOop load and decode. 3702 if (base == NULL) { 3703 if (shift == 0) { 3704 z_cl(Rop1, disp, Rbase); // Unscaled 3705 } else { 3706 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3707 z_cl(Z_R0, disp, Rbase); 3708 } 3709 } else { // HeapBased 3710 #ifdef ASSERT 3711 bool used_R0 = true; 3712 bool used_R1 = true; 3713 #endif 3714 Register current = Rop1; 3715 Label done; 3716 3717 if (maybeNULL) { // NULL ptr must be preserved! 3718 z_ltgr(Z_R0, current); 3719 z_bre(done); 3720 current = Z_R0; 3721 } 3722 3723 unsigned int base_h = ((unsigned long)base)>>32; 3724 unsigned int base_l = (unsigned int)((unsigned long)base); 3725 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3726 lgr_if_needed(Z_R0, current); 3727 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 3728 } else if ((base_h == 0) && (base_l != 0)) { 3729 lgr_if_needed(Z_R0, current); 3730 z_agfi(Z_R0, -(int)base_l); 3731 } else { 3732 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3733 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 3734 } 3735 3736 if (shift != 0) { 3737 z_srlg(Z_R0, Z_R0, shift); 3738 } 3739 bind(done); 3740 z_cl(Z_R0, disp, Rbase); 3741 #ifdef ASSERT 3742 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3743 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3744 #endif 3745 } 3746 } else { 3747 z_clg(Rop1, disp, Z_R0, Rbase); 3748 } 3749 BLOCK_COMMENT("} compare klass ptr"); 3750 } 3751 3752 //--------------------------- 3753 // Compressed oops 3754 //--------------------------- 3755 3756 void MacroAssembler::encode_heap_oop(Register oop) { 3757 oop_encoder(oop, oop, true /*maybe null*/); 3758 } 3759 3760 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 3761 oop_encoder(oop, oop, false /*not null*/); 3762 } 3763 3764 // Called with something derived from the oop base. e.g. oop_base>>3. 3765 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 3766 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 3767 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 3768 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 3769 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 3770 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 3771 + (oop_base_lh == 0 ? 0:1) 3772 + (oop_base_hl == 0 ? 0:1) 3773 + (oop_base_hh == 0 ? 0:1); 3774 3775 assert(oop_base != 0, "This is for HeapBased cOops only"); 3776 3777 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 3778 uint64_t pow2_offset = 0x10000 - oop_base_ll; 3779 if (pow2_offset < 0x8000) { // This might not be necessary. 3780 uint64_t oop_base2 = oop_base + pow2_offset; 3781 3782 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 3783 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 3784 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 3785 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 3786 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 3787 (oop_base_lh == 0 ? 0:1) + 3788 (oop_base_hl == 0 ? 0:1) + 3789 (oop_base_hh == 0 ? 0:1); 3790 if (n_notzero_parts == 1) { 3791 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 3792 return -pow2_offset; 3793 } 3794 } 3795 } 3796 return 0; 3797 } 3798 3799 // If base address is offset from a straight power of two by just a few pages, 3800 // return this offset to the caller for a possible later composite add. 3801 // TODO/FIX: will only work correctly for 4k pages. 3802 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 3803 int pow2_offset = get_oop_base_pow2_offset(oop_base); 3804 3805 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 3806 3807 return pow2_offset; 3808 } 3809 3810 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 3811 int offset = get_oop_base(Rbase, oop_base); 3812 z_lcgr(Rbase, Rbase); 3813 return -offset; 3814 } 3815 3816 // Compare compressed oop in memory against oop in register. 3817 // Rop1 - Oop in register. 3818 // disp - Offset of cOop in memory. 3819 // Rbase - Base address of cOop in memory. 3820 // maybeNULL - True if Rop1 possibly is a NULL. 3821 // maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction. 3822 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) { 3823 Register Rbase = mem.baseOrR0(); 3824 Register Rindex = mem.indexOrR0(); 3825 int64_t disp = mem.disp(); 3826 3827 const int shift = CompressedOops::shift(); 3828 address base = CompressedOops::base(); 3829 3830 assert(UseCompressedOops, "must be on to call this method"); 3831 assert(Universe::heap() != NULL, "java heap must be initialized to call this method"); 3832 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3833 assert_different_registers(Rop1, Z_R0); 3834 assert_different_registers(Rop1, Rbase, Z_R1); 3835 assert_different_registers(Rop1, Rindex, Z_R1); 3836 3837 BLOCK_COMMENT("compare heap oop {"); 3838 3839 // First encode register oop and then compare with cOop in memory. 3840 // This sequence saves an unnecessary cOop load and decode. 3841 if (base == NULL) { 3842 if (shift == 0) { 3843 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 3844 } else { 3845 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3846 z_cl(Z_R0, disp, Rindex, Rbase); 3847 } 3848 } else { // HeapBased 3849 #ifdef ASSERT 3850 bool used_R0 = true; 3851 bool used_R1 = true; 3852 #endif 3853 Label done; 3854 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3855 3856 if (maybeNULL) { // NULL ptr must be preserved! 3857 z_ltgr(Z_R0, Rop1); 3858 z_bre(done); 3859 } 3860 3861 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 3862 z_srlg(Z_R0, Z_R0, shift); 3863 3864 bind(done); 3865 z_cl(Z_R0, disp, Rindex, Rbase); 3866 #ifdef ASSERT 3867 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3868 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3869 #endif 3870 } 3871 BLOCK_COMMENT("} compare heap oop"); 3872 } 3873 3874 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 3875 const Address& addr, Register val, 3876 Register tmp1, Register tmp2, Register tmp3) { 3877 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3878 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 3879 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3880 decorators = AccessInternal::decorator_fixup(decorators, type); 3881 bool as_raw = (decorators & AS_RAW) != 0; 3882 if (as_raw) { 3883 bs->BarrierSetAssembler::store_at(this, decorators, type, 3884 addr, val, 3885 tmp1, tmp2, tmp3); 3886 } else { 3887 bs->store_at(this, decorators, type, 3888 addr, val, 3889 tmp1, tmp2, tmp3); 3890 } 3891 } 3892 3893 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 3894 const Address& addr, Register dst, 3895 Register tmp1, Register tmp2, Label *is_null) { 3896 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3897 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 3898 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3899 decorators = AccessInternal::decorator_fixup(decorators, type); 3900 bool as_raw = (decorators & AS_RAW) != 0; 3901 if (as_raw) { 3902 bs->BarrierSetAssembler::load_at(this, decorators, type, 3903 addr, dst, 3904 tmp1, tmp2, is_null); 3905 } else { 3906 bs->load_at(this, decorators, type, 3907 addr, dst, 3908 tmp1, tmp2, is_null); 3909 } 3910 } 3911 3912 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 3913 Register tmp1, Register tmp2, 3914 DecoratorSet decorators, Label *is_null) { 3915 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 3916 } 3917 3918 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 3919 Register tmp1, Register tmp2, Register tmp3, 3920 DecoratorSet decorators) { 3921 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 3922 } 3923 3924 //------------------------------------------------- 3925 // Encode compressed oop. Generally usable encoder. 3926 //------------------------------------------------- 3927 // Rsrc - contains regular oop on entry. It remains unchanged. 3928 // Rdst - contains compressed oop on exit. 3929 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 3930 // 3931 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 3932 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 3933 // 3934 // only32bitValid is set, if later code only uses the lower 32 bits. In this 3935 // case we must not fix the upper 32 bits. 3936 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL, 3937 Register Rbase, int pow2_offset, bool only32bitValid) { 3938 3939 const address oop_base = CompressedOops::base(); 3940 const int oop_shift = CompressedOops::shift(); 3941 const bool disjoint = CompressedOops::base_disjoint(); 3942 3943 assert(UseCompressedOops, "must be on to call this method"); 3944 assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder"); 3945 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3946 3947 if (disjoint || (oop_base == NULL)) { 3948 BLOCK_COMMENT("cOop encoder zeroBase {"); 3949 if (oop_shift == 0) { 3950 if (oop_base != NULL && !only32bitValid) { 3951 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 3952 } else { 3953 lgr_if_needed(Rdst, Rsrc); 3954 } 3955 } else { 3956 z_srlg(Rdst, Rsrc, oop_shift); 3957 if (oop_base != NULL && !only32bitValid) { 3958 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 3959 } 3960 } 3961 BLOCK_COMMENT("} cOop encoder zeroBase"); 3962 return; 3963 } 3964 3965 bool used_R0 = false; 3966 bool used_R1 = false; 3967 3968 BLOCK_COMMENT("cOop encoder general {"); 3969 assert_different_registers(Rdst, Z_R1); 3970 assert_different_registers(Rsrc, Rbase); 3971 if (maybeNULL) { 3972 Label done; 3973 // We reorder shifting and subtracting, so that we can compare 3974 // and shift in parallel: 3975 // 3976 // cycle 0: potential LoadN, base = <const> 3977 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 3978 // cycle 2: if (cr) br, dst = dst + base + offset 3979 3980 // Get oop_base components. 3981 if (pow2_offset == -1) { 3982 if (Rdst == Rbase) { 3983 if (Rdst == Z_R1 || Rsrc == Z_R1) { 3984 Rbase = Z_R0; 3985 used_R0 = true; 3986 } else { 3987 Rdst = Z_R1; 3988 used_R1 = true; 3989 } 3990 } 3991 if (Rbase == Z_R1) { 3992 used_R1 = true; 3993 } 3994 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 3995 } 3996 assert_different_registers(Rdst, Rbase); 3997 3998 // Check for NULL oop (must be left alone) and shift. 3999 if (oop_shift != 0) { // Shift out alignment bits 4000 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 4001 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4002 } else { 4003 z_srlg(Rdst, Rsrc, oop_shift); 4004 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 4005 // This probably is faster, as it does not write a register. No! 4006 // z_cghi(Rsrc, 0); 4007 } 4008 } else { 4009 z_ltgr(Rdst, Rsrc); // Move NULL to result register. 4010 } 4011 z_bre(done); 4012 4013 // Subtract oop_base components. 4014 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 4015 z_algr(Rdst, Rbase); 4016 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 4017 } else { 4018 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4019 } 4020 if (!only32bitValid) { 4021 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4022 } 4023 bind(done); 4024 4025 } else { // not null 4026 // Get oop_base components. 4027 if (pow2_offset == -1) { 4028 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4029 } 4030 4031 // Subtract oop_base components and shift. 4032 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4033 // Don't use lay instruction. 4034 if (Rdst == Rsrc) { 4035 z_algr(Rdst, Rbase); 4036 } else { 4037 lgr_if_needed(Rdst, Rbase); 4038 z_algr(Rdst, Rsrc); 4039 } 4040 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4041 } else { 4042 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4043 } 4044 if (oop_shift != 0) { // Shift out alignment bits. 4045 z_srlg(Rdst, Rdst, oop_shift); 4046 } 4047 if (!only32bitValid) { 4048 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4049 } 4050 } 4051 #ifdef ASSERT 4052 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4053 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4054 #endif 4055 BLOCK_COMMENT("} cOop encoder general"); 4056 } 4057 4058 //------------------------------------------------- 4059 // decode compressed oop. Generally usable decoder. 4060 //------------------------------------------------- 4061 // Rsrc - contains compressed oop on entry. 4062 // Rdst - contains regular oop on exit. 4063 // Rdst and Rsrc may indicate same register. 4064 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4065 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4066 // Rbase - register to use for the base 4067 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4068 // For performance, it is good to 4069 // - avoid Z_R0 for any of the argument registers. 4070 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4071 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4072 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) { 4073 4074 const address oop_base = CompressedOops::base(); 4075 const int oop_shift = CompressedOops::shift(); 4076 const bool disjoint = CompressedOops::base_disjoint(); 4077 4078 assert(UseCompressedOops, "must be on to call this method"); 4079 assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder"); 4080 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4081 "cOop encoder detected bad shift"); 4082 4083 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4084 4085 if (oop_base != NULL) { 4086 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4087 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4088 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4089 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4090 BLOCK_COMMENT("cOop decoder disjointBase {"); 4091 // We do not need to load the base. Instead, we can install the upper bits 4092 // with an OR instead of an ADD. 4093 Label done; 4094 4095 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4096 if (maybeNULL) { // NULL ptr must be preserved! 4097 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4098 z_bre(done); 4099 } else { 4100 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4101 } 4102 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4103 z_oihf(Rdst, oop_base_hf); 4104 } else if (oop_base_hl != 0) { 4105 z_oihl(Rdst, oop_base_hl); 4106 } else { 4107 assert(oop_base_hh != 0, "not heapbased mode"); 4108 z_oihh(Rdst, oop_base_hh); 4109 } 4110 bind(done); 4111 BLOCK_COMMENT("} cOop decoder disjointBase"); 4112 } else { 4113 BLOCK_COMMENT("cOop decoder general {"); 4114 // There are three decode steps: 4115 // scale oop offset (shift left) 4116 // get base (in reg) and pow2_offset (constant) 4117 // add base, pow2_offset, and oop offset 4118 // The following register overlap situations may exist: 4119 // Rdst == Rsrc, Rbase any other 4120 // not a problem. Scaling in-place leaves Rbase undisturbed. 4121 // Loading Rbase does not impact the scaled offset. 4122 // Rdst == Rbase, Rsrc any other 4123 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4124 // would destroy the scaled offset. 4125 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4126 // use Rbase_tmp if base has to be loaded. 4127 // Rsrc == Rbase, Rdst any other 4128 // Only possible without preloaded Rbase. 4129 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4130 // Rsrc == Rbase, Rdst == Rbase 4131 // Only possible without preloaded Rbase. 4132 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4133 // Remedy: use Rbase_tmp. 4134 // 4135 Label done; 4136 Register Rdst_tmp = Rdst; 4137 Register Rbase_tmp = Rbase; 4138 bool used_R0 = false; 4139 bool used_R1 = false; 4140 bool base_preloaded = pow2_offset >= 0; 4141 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4142 assert(oop_shift != 0, "room for optimization"); 4143 4144 // Check if we need to use scratch registers. 4145 if (Rdst == Rbase) { 4146 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4147 if (Rdst != Rsrc) { 4148 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4149 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4150 } else { 4151 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4152 } 4153 } 4154 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4155 4156 // Scale oop and check for NULL. 4157 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4158 if (maybeNULL) { // NULL ptr must be preserved! 4159 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4160 z_bre(done); 4161 } else { 4162 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4163 } 4164 4165 // Get oop_base components. 4166 if (!base_preloaded) { 4167 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4168 } 4169 4170 // Add up all components. 4171 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4172 z_algr(Rdst_tmp, Rbase_tmp); 4173 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4174 } else { 4175 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4176 } 4177 4178 bind(done); 4179 lgr_if_needed(Rdst, Rdst_tmp); 4180 #ifdef ASSERT 4181 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4182 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4183 #endif 4184 BLOCK_COMMENT("} cOop decoder general"); 4185 } 4186 } else { 4187 BLOCK_COMMENT("cOop decoder zeroBase {"); 4188 if (oop_shift == 0) { 4189 lgr_if_needed(Rdst, Rsrc); 4190 } else { 4191 z_sllg(Rdst, Rsrc, oop_shift); 4192 } 4193 BLOCK_COMMENT("} cOop decoder zeroBase"); 4194 } 4195 } 4196 4197 // ((OopHandle)result).resolve(); 4198 void MacroAssembler::resolve_oop_handle(Register result) { 4199 // OopHandle::resolve is an indirection. 4200 z_lg(result, 0, result); 4201 } 4202 4203 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4204 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4205 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes())); 4206 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4207 resolve_oop_handle(mirror); 4208 } 4209 4210 void MacroAssembler::load_method_holder(Register holder, Register method) { 4211 mem2reg_opt(holder, Address(method, Method::const_offset())); 4212 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4213 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); 4214 } 4215 4216 //--------------------------------------------------------------- 4217 //--- Operations on arrays. 4218 //--------------------------------------------------------------- 4219 4220 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4221 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4222 // work registers anyway. 4223 // Actually, only r0, r1, and r5 are killed. 4224 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4225 4226 int block_start = offset(); 4227 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4228 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4229 4230 Label doXC, doMVCLE, done; 4231 4232 BLOCK_COMMENT("Clear_Array {"); 4233 4234 // Check for zero len and convert to long. 4235 z_ltgfr(odd_tmp_reg, cnt_arg); 4236 z_bre(done); // Nothing to do if len == 0. 4237 4238 // Prefetch data to be cleared. 4239 if (VM_Version::has_Prefetch()) { 4240 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4241 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4242 } 4243 4244 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4245 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4246 z_brnh(doXC); // If so, use executed XC to clear. 4247 4248 // MVCLE: initialize long arrays (general case). 4249 bind(doMVCLE); 4250 z_lgr(dst_addr, base_pointer_arg); 4251 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4252 // The even register of the register pair is not killed. 4253 clear_reg(odd_tmp_reg, true, false); 4254 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4255 z_bru(done); 4256 4257 // XC: initialize short arrays. 4258 Label XC_template; // Instr template, never exec directly! 4259 bind(XC_template); 4260 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4261 4262 bind(doXC); 4263 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4264 if (VM_Version::has_ExecuteExtensions()) { 4265 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4266 } else { 4267 z_larl(odd_tmp_reg, XC_template); 4268 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4269 } 4270 // z_bru(done); // fallthru 4271 4272 bind(done); 4273 4274 BLOCK_COMMENT("} Clear_Array"); 4275 4276 int block_end = offset(); 4277 return block_end - block_start; 4278 } 4279 4280 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4281 // Emitter does not KILL any arguments nor work registers. 4282 // Emitter generates up to 16 XC instructions, depending on the array length. 4283 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4284 int block_start = offset(); 4285 int off; 4286 int lineSize_Bytes = AllocatePrefetchStepSize; 4287 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4288 bool doPrefetch = VM_Version::has_Prefetch(); 4289 int XC_maxlen = 256; 4290 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4291 4292 BLOCK_COMMENT("Clear_Array_Const {"); 4293 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4294 4295 // Do less prefetching for very short arrays. 4296 if (numXCInstr > 0) { 4297 // Prefetch only some cache lines, then begin clearing. 4298 if (doPrefetch) { 4299 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4300 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4301 } else { 4302 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4303 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4304 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4305 } 4306 } 4307 } 4308 4309 for (off=0; off<(numXCInstr-1); off++) { 4310 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4311 4312 // Prefetch some cache lines in advance. 4313 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4314 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4315 } 4316 } 4317 if (off*XC_maxlen < cnt*BytesPerWord) { 4318 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4319 } 4320 } 4321 BLOCK_COMMENT("} Clear_Array_Const"); 4322 4323 int block_end = offset(); 4324 return block_end - block_start; 4325 } 4326 4327 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4328 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4329 // work registers anyway. 4330 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4331 // 4332 // For very large arrays, exploit MVCLE H/W support. 4333 // MVCLE instruction automatically exploits H/W-optimized page mover. 4334 // - Bytes up to next page boundary are cleared with a series of XC to self. 4335 // - All full pages are cleared with the page mover H/W assist. 4336 // - Remaining bytes are again cleared by a series of XC to self. 4337 // 4338 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4339 4340 int block_start = offset(); 4341 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4342 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4343 4344 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4345 4346 // Get len to clear. 4347 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4348 4349 // Prepare other args to MVCLE. 4350 z_lgr(dst_addr, base_pointer_arg); 4351 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4352 // The even register of the register pair is not killed. 4353 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4354 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4355 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4356 4357 int block_end = offset(); 4358 return block_end - block_start; 4359 } 4360 4361 // Allocator. 4362 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4363 Register cnt_reg, 4364 Register tmp1_reg, Register tmp2_reg) { 4365 // Tmp1 is oddReg. 4366 // Tmp2 is evenReg. 4367 4368 int block_start = offset(); 4369 Label doMVC, doMVCLE, done, MVC_template; 4370 4371 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4372 4373 // Check for zero len and convert to long. 4374 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4375 z_bre(done); // Nothing to do if len == 0. 4376 4377 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4378 4379 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4380 z_brnh(doMVC); // If so, use executed MVC to clear. 4381 4382 bind(doMVCLE); // A lot of data (more than 256 bytes). 4383 // Prep dest reg pair. 4384 z_lgr(Z_R0, dst_reg); // dst addr 4385 // Dst len already in Z_R1. 4386 // Prep src reg pair. 4387 z_lgr(tmp2_reg, src_reg); // src addr 4388 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4389 4390 // Do the copy. 4391 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4392 z_bru(done); // All done. 4393 4394 bind(MVC_template); // Just some data (not more than 256 bytes). 4395 z_mvc(0, 0, dst_reg, 0, src_reg); 4396 4397 bind(doMVC); 4398 4399 if (VM_Version::has_ExecuteExtensions()) { 4400 add2reg(Z_R1, -1); 4401 } else { 4402 add2reg(tmp1_reg, -1, Z_R1); 4403 z_larl(Z_R1, MVC_template); 4404 } 4405 4406 if (VM_Version::has_Prefetch()) { 4407 z_pfd(1, 0,Z_R0,src_reg); 4408 z_pfd(2, 0,Z_R0,dst_reg); 4409 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4410 // z_pfd(2,256,Z_R0,dst_reg); 4411 } 4412 4413 if (VM_Version::has_ExecuteExtensions()) { 4414 z_exrl(Z_R1, MVC_template); 4415 } else { 4416 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4417 } 4418 4419 bind(done); 4420 4421 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4422 4423 int block_end = offset(); 4424 return block_end - block_start; 4425 } 4426 4427 //------------------------------------------------- 4428 // Constants (scalar and oop) in constant pool 4429 //------------------------------------------------- 4430 4431 // Add a non-relocated constant to the CP. 4432 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4433 long value = val.value(); 4434 address tocPos = long_constant(value); 4435 4436 if (tocPos != NULL) { 4437 int tocOffset = (int)(tocPos - code()->consts()->start()); 4438 return tocOffset; 4439 } 4440 // Address_constant returned NULL, so no constant entry has been created. 4441 // In that case, we return a "fatal" offset, just in case that subsequently 4442 // generated access code is executed. 4443 return -1; 4444 } 4445 4446 // Returns the TOC offset where the address is stored. 4447 // Add a relocated constant to the CP. 4448 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4449 // Use RelocationHolder::none for the constant pool entry. 4450 // Otherwise we will end up with a failing NativeCall::verify(x), 4451 // where x is the address of the constant pool entry. 4452 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4453 4454 if (tocPos != NULL) { 4455 int tocOffset = (int)(tocPos - code()->consts()->start()); 4456 RelocationHolder rsp = oop.rspec(); 4457 Relocation *rel = rsp.reloc(); 4458 4459 // Store toc_offset in relocation, used by call_far_patchable. 4460 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4461 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4462 } 4463 // Relocate at the load's pc. 4464 relocate(rsp); 4465 4466 return tocOffset; 4467 } 4468 // Address_constant returned NULL, so no constant entry has been created 4469 // in that case, we return a "fatal" offset, just in case that subsequently 4470 // generated access code is executed. 4471 return -1; 4472 } 4473 4474 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4475 int tocOffset = store_const_in_toc(a); 4476 if (tocOffset == -1) return false; 4477 address tocPos = tocOffset + code()->consts()->start(); 4478 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4479 relocate(a.rspec()); 4480 load_long_pcrelative(dst, tocPos); 4481 return true; 4482 } 4483 4484 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4485 int tocOffset = store_oop_in_toc(a); 4486 if (tocOffset == -1) return false; 4487 address tocPos = tocOffset + code()->consts()->start(); 4488 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4489 4490 load_addr_pcrelative(dst, tocPos); 4491 return true; 4492 } 4493 4494 // If the instruction sequence at the given pc is a load_const_from_toc 4495 // sequence, return the value currently stored at the referenced position 4496 // in the TOC. 4497 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4498 4499 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4500 4501 long offset = get_load_const_from_toc_offset(pc); 4502 address dataLoc = NULL; 4503 if (is_load_const_from_toc_pcrelative(pc)) { 4504 dataLoc = pc + offset; 4505 } else { 4506 CodeBlob* cb = CodeCache::find_blob(pc); 4507 assert(cb && cb->is_nmethod(), "sanity"); 4508 nmethod* nm = (nmethod*)cb; 4509 dataLoc = nm->ctable_begin() + offset; 4510 } 4511 return *(intptr_t *)dataLoc; 4512 } 4513 4514 // If the instruction sequence at the given pc is a load_const_from_toc 4515 // sequence, copy the passed-in new_data value into the referenced 4516 // position in the TOC. 4517 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 4518 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4519 4520 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 4521 address dataLoc = NULL; 4522 if (is_load_const_from_toc_pcrelative(pc)) { 4523 dataLoc = pc+offset; 4524 } else { 4525 nmethod* nm = CodeCache::find_nmethod(pc); 4526 assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 4527 dataLoc = nm->ctable_begin() + offset; 4528 } 4529 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 4530 *(unsigned long *)dataLoc = new_data; 4531 } 4532 } 4533 4534 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 4535 // site. Verify by calling is_load_const_from_toc() before!! 4536 // Offset is +/- 2**32 -> use long. 4537 long MacroAssembler::get_load_const_from_toc_offset(address a) { 4538 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 4539 // expected code sequence: 4540 // z_lgrl(t, simm32); len = 6 4541 unsigned long inst; 4542 unsigned int len = get_instruction(a, &inst); 4543 return get_pcrel_offset(inst); 4544 } 4545 4546 //********************************************************************************** 4547 // inspection of generated instruction sequences for a particular pattern 4548 //********************************************************************************** 4549 4550 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 4551 #ifdef ASSERT 4552 unsigned long inst; 4553 unsigned int len = get_instruction(a+2, &inst); 4554 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 4555 const int range = 128; 4556 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 4557 VM_Version::z_SIGSEGV(); 4558 } 4559 #endif 4560 // expected code sequence: 4561 // z_lgrl(t, relAddr32); len = 6 4562 //TODO: verify accessed data is in CP, if possible. 4563 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 4564 } 4565 4566 bool MacroAssembler::is_load_const_from_toc_call(address a) { 4567 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 4568 } 4569 4570 bool MacroAssembler::is_load_const_call(address a) { 4571 return is_load_const(a) && is_call_byregister(a + load_const_size()); 4572 } 4573 4574 //------------------------------------------------- 4575 // Emitters for some really CICS instructions 4576 //------------------------------------------------- 4577 4578 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 4579 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 4580 assert(src->encoding()%2==0, "must be an even/odd register pair"); 4581 assert(pad<256, "must be a padding BYTE"); 4582 4583 Label retry; 4584 bind(retry); 4585 Assembler::z_mvcle(dst, src, pad); 4586 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4587 } 4588 4589 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 4590 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4591 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4592 assert(pad<256, "must be a padding BYTE"); 4593 4594 Label retry; 4595 bind(retry); 4596 Assembler::z_clcle(left, right, pad, Z_R0); 4597 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4598 } 4599 4600 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 4601 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4602 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4603 assert(pad<=0xfff, "must be a padding HALFWORD"); 4604 assert(VM_Version::has_ETF2(), "instruction must be available"); 4605 4606 Label retry; 4607 bind(retry); 4608 Assembler::z_clclu(left, right, pad, Z_R0); 4609 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4610 } 4611 4612 void MacroAssembler::search_string(Register end, Register start) { 4613 assert(end->encoding() != 0, "end address must not be in R0"); 4614 assert(start->encoding() != 0, "start address must not be in R0"); 4615 4616 Label retry; 4617 bind(retry); 4618 Assembler::z_srst(end, start); 4619 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4620 } 4621 4622 void MacroAssembler::search_string_uni(Register end, Register start) { 4623 assert(end->encoding() != 0, "end address must not be in R0"); 4624 assert(start->encoding() != 0, "start address must not be in R0"); 4625 assert(VM_Version::has_ETF3(), "instruction must be available"); 4626 4627 Label retry; 4628 bind(retry); 4629 Assembler::z_srstu(end, start); 4630 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4631 } 4632 4633 void MacroAssembler::kmac(Register srcBuff) { 4634 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4635 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4636 4637 Label retry; 4638 bind(retry); 4639 Assembler::z_kmac(Z_R0, srcBuff); 4640 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4641 } 4642 4643 void MacroAssembler::kimd(Register srcBuff) { 4644 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4645 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4646 4647 Label retry; 4648 bind(retry); 4649 Assembler::z_kimd(Z_R0, srcBuff); 4650 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4651 } 4652 4653 void MacroAssembler::klmd(Register srcBuff) { 4654 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4655 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4656 4657 Label retry; 4658 bind(retry); 4659 Assembler::z_klmd(Z_R0, srcBuff); 4660 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4661 } 4662 4663 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 4664 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4665 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4666 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4667 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4668 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4669 4670 Label retry; 4671 bind(retry); 4672 Assembler::z_km(dstBuff, srcBuff); 4673 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4674 } 4675 4676 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 4677 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4678 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4679 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4680 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4681 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4682 4683 Label retry; 4684 bind(retry); 4685 Assembler::z_kmc(dstBuff, srcBuff); 4686 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4687 } 4688 4689 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) { 4690 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4691 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4692 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4693 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0"); 4694 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0"); 4695 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register"); 4696 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4697 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4698 4699 Label retry; 4700 bind(retry); 4701 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff); 4702 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4703 } 4704 4705 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 4706 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4707 4708 Label retry; 4709 bind(retry); 4710 Assembler::z_cksm(crcBuff, srcBuff); 4711 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4712 } 4713 4714 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 4715 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4716 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4717 4718 Label retry; 4719 bind(retry); 4720 Assembler::z_troo(r1, r2, m3); 4721 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4722 } 4723 4724 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 4725 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4726 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4727 4728 Label retry; 4729 bind(retry); 4730 Assembler::z_trot(r1, r2, m3); 4731 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4732 } 4733 4734 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 4735 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4736 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4737 4738 Label retry; 4739 bind(retry); 4740 Assembler::z_trto(r1, r2, m3); 4741 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4742 } 4743 4744 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 4745 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4746 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4747 4748 Label retry; 4749 bind(retry); 4750 Assembler::z_trtt(r1, r2, m3); 4751 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4752 } 4753 4754 //--------------------------------------- 4755 // Helpers for Intrinsic Emitters 4756 //--------------------------------------- 4757 4758 /** 4759 * uint32_t crc; 4760 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4761 */ 4762 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 4763 assert_different_registers(crc, table, tmp); 4764 assert_different_registers(val, table); 4765 if (crc == val) { // Must rotate first to use the unmodified value. 4766 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4767 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4768 } else { 4769 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4770 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4771 } 4772 z_x(crc, Address(table, tmp, 0)); 4773 } 4774 4775 /** 4776 * uint32_t crc; 4777 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4778 */ 4779 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 4780 fold_byte_crc32(crc, crc, table, tmp); 4781 } 4782 4783 /** 4784 * Emits code to update CRC-32 with a byte value according to constants in table. 4785 * 4786 * @param [in,out]crc Register containing the crc. 4787 * @param [in]val Register containing the byte to fold into the CRC. 4788 * @param [in]table Register containing the table of crc constants. 4789 * 4790 * uint32_t crc; 4791 * val = crc_table[(val ^ crc) & 0xFF]; 4792 * crc = val ^ (crc >> 8); 4793 */ 4794 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 4795 z_xr(val, crc); 4796 fold_byte_crc32(crc, val, table, val); 4797 } 4798 4799 4800 /** 4801 * @param crc register containing existing CRC (32-bit) 4802 * @param buf register pointing to input byte buffer (byte*) 4803 * @param len register containing number of bytes 4804 * @param table register pointing to CRC table 4805 */ 4806 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 4807 assert_different_registers(crc, buf, len, table, data); 4808 4809 Label L_mainLoop, L_done; 4810 const int mainLoop_stepping = 1; 4811 4812 // Process all bytes in a single-byte loop. 4813 z_ltr(len, len); 4814 z_brnh(L_done); 4815 4816 bind(L_mainLoop); 4817 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4818 add2reg(buf, mainLoop_stepping); // Advance buffer position. 4819 update_byte_crc32(crc, data, table); 4820 z_brct(len, L_mainLoop); // Iterate. 4821 4822 bind(L_done); 4823 } 4824 4825 /** 4826 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 4827 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 4828 * 4829 */ 4830 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 4831 Register t0, Register t1, Register t2, Register t3) { 4832 // This is what we implement (the DOBIG4 part): 4833 // 4834 // #define DOBIG4 c ^= *++buf4; \ 4835 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 4836 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 4837 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 4838 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 4839 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 4840 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 4841 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 4842 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 4843 4844 // XOR crc with next four bytes of buffer. 4845 lgr_if_needed(t0, crc); 4846 z_x(t0, Address(buf, bufDisp)); 4847 if (bufInc != 0) { 4848 add2reg(buf, bufInc); 4849 } 4850 4851 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 4852 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 4853 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 4854 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 4855 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 4856 4857 // XOR indexed table values to calculate updated crc. 4858 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 4859 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 4860 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 4861 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 4862 z_xr(t0, t2); // Now t0 contains the updated CRC value. 4863 lgr_if_needed(crc, t0); 4864 } 4865 4866 /** 4867 * @param crc register containing existing CRC (32-bit) 4868 * @param buf register pointing to input byte buffer (byte*) 4869 * @param len register containing number of bytes 4870 * @param table register pointing to CRC table 4871 * 4872 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 4873 */ 4874 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 4875 Register t0, Register t1, Register t2, Register t3, 4876 bool invertCRC) { 4877 assert_different_registers(crc, buf, len, table); 4878 4879 Label L_mainLoop, L_tail; 4880 Register data = t0; 4881 Register ctr = Z_R0; 4882 const int mainLoop_stepping = 4; 4883 const int log_stepping = exact_log2(mainLoop_stepping); 4884 4885 // Don't test for len <= 0 here. This pathological case should not occur anyway. 4886 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 4887 // The situation itself is detected and handled correctly by the conditional branches 4888 // following aghi(len, -stepping) and aghi(len, +stepping). 4889 4890 if (invertCRC) { 4891 not_(crc, noreg, false); // 1s complement of crc 4892 } 4893 4894 // Check for short (<4 bytes) buffer. 4895 z_srag(ctr, len, log_stepping); 4896 z_brnh(L_tail); 4897 4898 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 4899 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 4900 4901 BIND(L_mainLoop); 4902 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 4903 z_brct(ctr, L_mainLoop); // Iterate. 4904 4905 z_lrvr(crc, crc); // Revert byte order back to original. 4906 4907 // Process last few (<8) bytes of buffer. 4908 BIND(L_tail); 4909 update_byteLoop_crc32(crc, buf, len, table, data); 4910 4911 if (invertCRC) { 4912 not_(crc, noreg, false); // 1s complement of crc 4913 } 4914 } 4915 4916 /** 4917 * @param crc register containing existing CRC (32-bit) 4918 * @param buf register pointing to input byte buffer (byte*) 4919 * @param len register containing number of bytes 4920 * @param table register pointing to CRC table 4921 */ 4922 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 4923 Register t0, Register t1, Register t2, Register t3, 4924 bool invertCRC) { 4925 assert_different_registers(crc, buf, len, table); 4926 Register data = t0; 4927 4928 if (invertCRC) { 4929 not_(crc, noreg, false); // 1s complement of crc 4930 } 4931 4932 update_byteLoop_crc32(crc, buf, len, table, data); 4933 4934 if (invertCRC) { 4935 not_(crc, noreg, false); // 1s complement of crc 4936 } 4937 } 4938 4939 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 4940 bool invertCRC) { 4941 assert_different_registers(crc, buf, len, table, tmp); 4942 4943 if (invertCRC) { 4944 not_(crc, noreg, false); // 1s complement of crc 4945 } 4946 4947 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4948 update_byte_crc32(crc, tmp, table); 4949 4950 if (invertCRC) { 4951 not_(crc, noreg, false); // 1s complement of crc 4952 } 4953 } 4954 4955 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 4956 bool invertCRC) { 4957 assert_different_registers(crc, val, table); 4958 4959 if (invertCRC) { 4960 not_(crc, noreg, false); // 1s complement of crc 4961 } 4962 4963 update_byte_crc32(crc, val, table); 4964 4965 if (invertCRC) { 4966 not_(crc, noreg, false); // 1s complement of crc 4967 } 4968 } 4969 4970 // 4971 // Code for BigInteger::multiplyToLen() intrinsic. 4972 // 4973 4974 // dest_lo += src1 + src2 4975 // dest_hi += carry1 + carry2 4976 // Z_R7 is destroyed ! 4977 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 4978 Register src1, Register src2) { 4979 clear_reg(Z_R7); 4980 z_algr(dest_lo, src1); 4981 z_alcgr(dest_hi, Z_R7); 4982 z_algr(dest_lo, src2); 4983 z_alcgr(dest_hi, Z_R7); 4984 } 4985 4986 // Multiply 64 bit by 64 bit first loop. 4987 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 4988 Register x_xstart, 4989 Register y, Register y_idx, 4990 Register z, 4991 Register carry, 4992 Register product, 4993 Register idx, Register kdx) { 4994 // jlong carry, x[], y[], z[]; 4995 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4996 // huge_128 product = y[idx] * x[xstart] + carry; 4997 // z[kdx] = (jlong)product; 4998 // carry = (jlong)(product >>> 64); 4999 // } 5000 // z[xstart] = carry; 5001 5002 Label L_first_loop, L_first_loop_exit; 5003 Label L_one_x, L_one_y, L_multiply; 5004 5005 z_aghi(xstart, -1); 5006 z_brl(L_one_x); // Special case: length of x is 1. 5007 5008 // Load next two integers of x. 5009 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5010 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5011 5012 5013 bind(L_first_loop); 5014 5015 z_aghi(idx, -1); 5016 z_brl(L_first_loop_exit); 5017 z_aghi(idx, -1); 5018 z_brl(L_one_y); 5019 5020 // Load next two integers of y. 5021 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 5022 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 5023 5024 5025 bind(L_multiply); 5026 5027 Register multiplicand = product->successor(); 5028 Register product_low = multiplicand; 5029 5030 lgr_if_needed(multiplicand, x_xstart); 5031 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 5032 clear_reg(Z_R7); 5033 z_algr(product_low, carry); // Add carry to result. 5034 z_alcgr(product, Z_R7); // Add carry of the last addition. 5035 add2reg(kdx, -2); 5036 5037 // Store result. 5038 z_sllg(Z_R7, kdx, LogBytesPerInt); 5039 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5040 lgr_if_needed(carry, product); 5041 z_bru(L_first_loop); 5042 5043 5044 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5045 5046 clear_reg(y_idx); 5047 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5048 z_bru(L_multiply); 5049 5050 5051 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5052 5053 clear_reg(x_xstart); 5054 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5055 z_bru(L_first_loop); 5056 5057 bind(L_first_loop_exit); 5058 } 5059 5060 // Multiply 64 bit by 64 bit and add 128 bit. 5061 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5062 Register z, 5063 Register yz_idx, Register idx, 5064 Register carry, Register product, 5065 int offset) { 5066 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5067 // z[kdx] = (jlong)product; 5068 5069 Register multiplicand = product->successor(); 5070 Register product_low = multiplicand; 5071 5072 z_sllg(Z_R7, idx, LogBytesPerInt); 5073 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5074 5075 lgr_if_needed(multiplicand, x_xstart); 5076 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5077 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5078 5079 add2_with_carry(product, product_low, carry, yz_idx); 5080 5081 z_sllg(Z_R7, idx, LogBytesPerInt); 5082 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5083 5084 } 5085 5086 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5087 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5088 Register y, Register z, 5089 Register yz_idx, Register idx, 5090 Register jdx, 5091 Register carry, Register product, 5092 Register carry2) { 5093 // jlong carry, x[], y[], z[]; 5094 // int kdx = ystart+1; 5095 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5096 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5097 // z[kdx+idx+1] = (jlong)product; 5098 // jlong carry2 = (jlong)(product >>> 64); 5099 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5100 // z[kdx+idx] = (jlong)product; 5101 // carry = (jlong)(product >>> 64); 5102 // } 5103 // idx += 2; 5104 // if (idx > 0) { 5105 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5106 // z[kdx+idx] = (jlong)product; 5107 // carry = (jlong)(product >>> 64); 5108 // } 5109 5110 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5111 5112 // scale the index 5113 lgr_if_needed(jdx, idx); 5114 and_imm(jdx, 0xfffffffffffffffcL); 5115 rshift(jdx, 2); 5116 5117 5118 bind(L_third_loop); 5119 5120 z_aghi(jdx, -1); 5121 z_brl(L_third_loop_exit); 5122 add2reg(idx, -4); 5123 5124 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5125 lgr_if_needed(carry2, product); 5126 5127 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5128 lgr_if_needed(carry, product); 5129 z_bru(L_third_loop); 5130 5131 5132 bind(L_third_loop_exit); // Handle any left-over operand parts. 5133 5134 and_imm(idx, 0x3); 5135 z_brz(L_post_third_loop_done); 5136 5137 Label L_check_1; 5138 5139 z_aghi(idx, -2); 5140 z_brl(L_check_1); 5141 5142 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5143 lgr_if_needed(carry, product); 5144 5145 5146 bind(L_check_1); 5147 5148 add2reg(idx, 0x2); 5149 and_imm(idx, 0x1); 5150 z_aghi(idx, -1); 5151 z_brl(L_post_third_loop_done); 5152 5153 Register multiplicand = product->successor(); 5154 Register product_low = multiplicand; 5155 5156 z_sllg(Z_R7, idx, LogBytesPerInt); 5157 clear_reg(yz_idx); 5158 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5159 lgr_if_needed(multiplicand, x_xstart); 5160 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5161 clear_reg(yz_idx); 5162 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5163 5164 add2_with_carry(product, product_low, yz_idx, carry); 5165 5166 z_sllg(Z_R7, idx, LogBytesPerInt); 5167 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5168 rshift(product_low, 32); 5169 5170 lshift(product, 32); 5171 z_ogr(product_low, product); 5172 lgr_if_needed(carry, product_low); 5173 5174 bind(L_post_third_loop_done); 5175 } 5176 5177 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5178 Register y, Register ylen, 5179 Register z, 5180 Register tmp1, Register tmp2, 5181 Register tmp3, Register tmp4, 5182 Register tmp5) { 5183 ShortBranchVerifier sbv(this); 5184 5185 assert_different_registers(x, xlen, y, ylen, z, 5186 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5187 assert_different_registers(x, xlen, y, ylen, z, 5188 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5189 5190 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5191 5192 // In openJdk, we store the argument as 32-bit value to slot. 5193 Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian. 5194 5195 const Register idx = tmp1; 5196 const Register kdx = tmp2; 5197 const Register xstart = tmp3; 5198 5199 const Register y_idx = tmp4; 5200 const Register carry = tmp5; 5201 const Register product = Z_R0_scratch; 5202 const Register x_xstart = Z_R8; 5203 5204 // First Loop. 5205 // 5206 // final static long LONG_MASK = 0xffffffffL; 5207 // int xstart = xlen - 1; 5208 // int ystart = ylen - 1; 5209 // long carry = 0; 5210 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5211 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5212 // z[kdx] = (int)product; 5213 // carry = product >>> 32; 5214 // } 5215 // z[xstart] = (int)carry; 5216 // 5217 5218 lgr_if_needed(idx, ylen); // idx = ylen 5219 z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended. 5220 clear_reg(carry); // carry = 0 5221 5222 Label L_done; 5223 5224 lgr_if_needed(xstart, xlen); 5225 z_aghi(xstart, -1); 5226 z_brl(L_done); 5227 5228 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5229 5230 NearLabel L_second_loop; 5231 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5232 5233 NearLabel L_carry; 5234 z_aghi(kdx, -1); 5235 z_brz(L_carry); 5236 5237 // Store lower 32 bits of carry. 5238 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5239 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5240 rshift(carry, 32); 5241 z_aghi(kdx, -1); 5242 5243 5244 bind(L_carry); 5245 5246 // Store upper 32 bits of carry. 5247 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5248 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5249 5250 // Second and third (nested) loops. 5251 // 5252 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5253 // carry = 0; 5254 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5255 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5256 // (z[k] & LONG_MASK) + carry; 5257 // z[k] = (int)product; 5258 // carry = product >>> 32; 5259 // } 5260 // z[i] = (int)carry; 5261 // } 5262 // 5263 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5264 5265 const Register jdx = tmp1; 5266 5267 bind(L_second_loop); 5268 5269 clear_reg(carry); // carry = 0; 5270 lgr_if_needed(jdx, ylen); // j = ystart+1 5271 5272 z_aghi(xstart, -1); // i = xstart-1; 5273 z_brl(L_done); 5274 5275 // Use free slots in the current stackframe instead of push/pop. 5276 Address zsave(Z_SP, _z_abi(carg_1)); 5277 reg2mem_opt(z, zsave); 5278 5279 5280 Label L_last_x; 5281 5282 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5283 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5284 z_aghi(xstart, -1); // i = xstart-1; 5285 z_brl(L_last_x); 5286 5287 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5288 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5289 5290 5291 Label L_third_loop_prologue; 5292 5293 bind(L_third_loop_prologue); 5294 5295 Address xsave(Z_SP, _z_abi(carg_2)); 5296 Address xlensave(Z_SP, _z_abi(carg_3)); 5297 Address ylensave(Z_SP, _z_abi(carg_4)); 5298 5299 reg2mem_opt(x, xsave); 5300 reg2mem_opt(xstart, xlensave); 5301 reg2mem_opt(ylen, ylensave); 5302 5303 5304 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5305 5306 mem2reg_opt(z, zsave); 5307 mem2reg_opt(x, xsave); 5308 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5309 mem2reg_opt(ylen, ylensave); 5310 5311 add2reg(tmp3, 1, xlen); 5312 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5313 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5314 z_aghi(tmp3, -1); 5315 z_brl(L_done); 5316 5317 rshift(carry, 32); 5318 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5319 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5320 z_bru(L_second_loop); 5321 5322 // Next infrequent code is moved outside loops. 5323 bind(L_last_x); 5324 5325 clear_reg(x_xstart); 5326 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5327 z_bru(L_third_loop_prologue); 5328 5329 bind(L_done); 5330 5331 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5332 } 5333 5334 #ifndef PRODUCT 5335 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5336 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5337 Label ok; 5338 if (check_equal) { 5339 z_bre(ok); 5340 } else { 5341 z_brne(ok); 5342 } 5343 stop(msg, id); 5344 bind(ok); 5345 } 5346 5347 // Assert if CC indicates "low". 5348 void MacroAssembler::asm_assert_low(const char *msg, int id) { 5349 Label ok; 5350 z_brnl(ok); 5351 stop(msg, id); 5352 bind(ok); 5353 } 5354 5355 // Assert if CC indicates "high". 5356 void MacroAssembler::asm_assert_high(const char *msg, int id) { 5357 Label ok; 5358 z_brnh(ok); 5359 stop(msg, id); 5360 bind(ok); 5361 } 5362 5363 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false) 5364 // generate non-relocatable code. 5365 void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) { 5366 Label ok; 5367 if (check_equal) { z_bre(ok); } 5368 else { z_brne(ok); } 5369 stop_static(msg, id); 5370 bind(ok); 5371 } 5372 5373 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5374 Register mem_base, const char* msg, int id) { 5375 switch (size) { 5376 case 4: 5377 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5378 break; 5379 case 8: 5380 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5381 break; 5382 default: 5383 ShouldNotReachHere(); 5384 } 5385 if (allow_relocation) { asm_assert(check_equal, msg, id); } 5386 else { asm_assert_static(check_equal, msg, id); } 5387 } 5388 5389 // Check the condition 5390 // expected_size == FP - SP 5391 // after transformation: 5392 // expected_size - FP + SP == 0 5393 // Destroys Register expected_size if no tmp register is passed. 5394 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5395 if (tmp == noreg) { 5396 tmp = expected_size; 5397 } else { 5398 if (tmp != expected_size) { 5399 z_lgr(tmp, expected_size); 5400 } 5401 z_algr(tmp, Z_SP); 5402 z_slg(tmp, 0, Z_R0, Z_SP); 5403 asm_assert_eq(msg, id); 5404 } 5405 } 5406 #endif // !PRODUCT 5407 5408 // Save and restore functions: Exclude Z_R0. 5409 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5410 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5411 if (include_fp) { 5412 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5413 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5414 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5415 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5416 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5417 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5418 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5419 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5420 } 5421 if (include_flags) { 5422 Label done; 5423 z_mvi(Address(dst, offset), 2); // encoding: equal 5424 z_bre(done); 5425 z_mvi(Address(dst, offset), 4); // encoding: higher 5426 z_brh(done); 5427 z_mvi(Address(dst, offset), 1); // encoding: lower 5428 bind(done); 5429 } 5430 } 5431 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5432 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5433 if (include_fp) { 5434 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5435 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5436 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5437 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5438 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5439 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5440 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5441 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5442 } 5443 if (include_flags) { 5444 z_cli(Address(src, offset), 2); // see encoding above 5445 } 5446 } 5447 5448 // Plausibility check for oops. 5449 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5450 if (!VerifyOops) return; 5451 5452 BLOCK_COMMENT("verify_oop {"); 5453 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5454 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5455 5456 save_return_pc(); 5457 5458 // Push frame, but preserve flags 5459 z_lgr(Z_R0, Z_SP); 5460 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5461 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5462 5463 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5464 5465 lgr_if_needed(Z_ARG2, oop); 5466 load_const_optimized(Z_ARG1, (address)msg); 5467 load_const_optimized(Z_R1, entry_addr); 5468 z_lg(Z_R1, 0, Z_R1); 5469 call_c(Z_R1); 5470 5471 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5472 pop_frame(); 5473 restore_return_pc(); 5474 5475 BLOCK_COMMENT("} verify_oop "); 5476 } 5477 5478 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5479 if (!VerifyOops) return; 5480 5481 BLOCK_COMMENT("verify_oop {"); 5482 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5483 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5484 5485 save_return_pc(); 5486 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5487 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5488 5489 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5490 load_const_optimized(Z_ARG1, (address)msg); 5491 load_const_optimized(Z_R1, entry_addr); 5492 z_lg(Z_R1, 0, Z_R1); 5493 call_c(Z_R1); 5494 5495 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5496 pop_frame(); 5497 restore_return_pc(); 5498 5499 BLOCK_COMMENT("} verify_oop "); 5500 } 5501 5502 const char* MacroAssembler::stop_types[] = { 5503 "stop", 5504 "untested", 5505 "unimplemented", 5506 "shouldnotreachhere" 5507 }; 5508 5509 static void stop_on_request(const char* tp, const char* msg) { 5510 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5511 guarantee(false, "Z assembly code requires stop: %s", msg); 5512 } 5513 5514 void MacroAssembler::stop(int type, const char* msg, int id) { 5515 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5516 5517 // Setup arguments. 5518 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5519 load_const(Z_ARG2, (void*) msg); 5520 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5521 save_return_pc(); // Saves return pc Z_R14. 5522 push_frame_abi160(0); 5523 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5524 // The plain disassembler does not recognize illtrap. It instead displays 5525 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5526 // the proper beginning of the next instruction. 5527 z_illtrap(); // Illegal instruction. 5528 z_illtrap(); // Illegal instruction. 5529 5530 BLOCK_COMMENT(" } stop"); 5531 } 5532 5533 // Special version of stop() for code size reduction. 5534 // Reuses the previously generated call sequence, if any. 5535 // Generates the call sequence on its own, if necessary. 5536 // Note: This code will work only in non-relocatable code! 5537 // The relative address of the data elements (arg1, arg2) must not change. 5538 // The reentry point must not move relative to it's users. This prerequisite 5539 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5540 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5541 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5542 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5543 5544 // Setup arguments. 5545 if (allow_relocation) { 5546 // Relocatable version (for comparison purposes). Remove after some time. 5547 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5548 load_const(Z_ARG2, (void*) msg); 5549 } else { 5550 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 5551 load_absolute_address(Z_ARG2, (address)msg); 5552 } 5553 if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 5554 BLOCK_COMMENT("branch to reentry point:"); 5555 z_brc(bcondAlways, reentry); 5556 } else { 5557 BLOCK_COMMENT("reentry point:"); 5558 reentry = pc(); // Re-entry point for subsequent stop calls. 5559 save_return_pc(); // Saves return pc Z_R14. 5560 push_frame_abi160(0); 5561 if (allow_relocation) { 5562 reentry = NULL; // Prevent reentry if code relocation is allowed. 5563 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5564 } else { 5565 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5566 } 5567 z_illtrap(); // Illegal instruction as emergency stop, should the above call return. 5568 } 5569 BLOCK_COMMENT(" } stop_chain"); 5570 5571 return reentry; 5572 } 5573 5574 // Special version of stop() for code size reduction. 5575 // Assumes constant relative addresses for data and runtime call. 5576 void MacroAssembler::stop_static(int type, const char* msg, int id) { 5577 stop_chain(NULL, type, msg, id, false); 5578 } 5579 5580 void MacroAssembler::stop_subroutine() { 5581 unimplemented("stop_subroutine", 710); 5582 } 5583 5584 // Prints msg to stdout from within generated code.. 5585 void MacroAssembler::warn(const char* msg) { 5586 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 5587 load_absolute_address(Z_R1, (address) warning); 5588 load_absolute_address(Z_ARG1, (address) msg); 5589 (void) call(Z_R1); 5590 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 5591 } 5592 5593 #ifndef PRODUCT 5594 5595 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 5596 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 5597 if (!ZapEmptyStackFields) return; 5598 BLOCK_COMMENT("zap memory region {"); 5599 load_const_optimized(val, 0x0101010101010101); 5600 int size = before + after; 5601 if (low == high && size < 5 && size > 0) { 5602 int offset = -before*BytesPerWord; 5603 for (int i = 0; i < size; ++i) { 5604 z_stg(val, Address(low, offset)); 5605 offset +=(1*BytesPerWord); 5606 } 5607 } else { 5608 add2reg(addr, -before*BytesPerWord, low); 5609 if (after) { 5610 #ifdef ASSERT 5611 jlong check = after * BytesPerWord; 5612 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 5613 #endif 5614 add2reg(high, after * BytesPerWord); 5615 } 5616 NearLabel loop; 5617 bind(loop); 5618 z_stg(val, Address(addr)); 5619 add2reg(addr, 8); 5620 compare64_and_branch(addr, high, bcondNotHigh, loop); 5621 if (after) { 5622 add2reg(high, -after * BytesPerWord); 5623 } 5624 } 5625 BLOCK_COMMENT("} zap memory region"); 5626 } 5627 #endif // !PRODUCT 5628 5629 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { 5630 _masm = masm; 5631 _masm->load_absolute_address(_rscratch, (address)flag_addr); 5632 _masm->load_and_test_int(_rscratch, Address(_rscratch)); 5633 if (value) { 5634 _masm->z_brne(_label); // Skip if true, i.e. != 0. 5635 } else { 5636 _masm->z_bre(_label); // Skip if false, i.e. == 0. 5637 } 5638 } 5639 5640 SkipIfEqual::~SkipIfEqual() { 5641 _masm->bind(_label); 5642 }