1 /* 2 * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2023 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/codeBuffer.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "compiler/disassembler.hpp" 30 #include "gc/shared/barrierSet.hpp" 31 #include "gc/shared/barrierSetAssembler.hpp" 32 #include "gc/shared/collectedHeap.inline.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "memory/universe.hpp" 37 #include "oops/accessDecorators.hpp" 38 #include "oops/compressedKlass.inline.hpp" 39 #include "oops/compressedOops.inline.hpp" 40 #include "oops/klass.inline.hpp" 41 #include "prims/methodHandles.hpp" 42 #include "registerSaver_s390.hpp" 43 #include "runtime/icache.hpp" 44 #include "runtime/interfaceSupport.inline.hpp" 45 #include "runtime/objectMonitor.hpp" 46 #include "runtime/os.hpp" 47 #include "runtime/safepoint.hpp" 48 #include "runtime/safepointMechanism.hpp" 49 #include "runtime/sharedRuntime.hpp" 50 #include "runtime/stubRoutines.hpp" 51 #include "utilities/events.hpp" 52 #include "utilities/macros.hpp" 53 #include "utilities/powerOfTwo.hpp" 54 55 #include <ucontext.h> 56 57 #define BLOCK_COMMENT(str) block_comment(str) 58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 59 60 // Move 32-bit register if destination and source are different. 61 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 62 if (rs != rd) { z_lr(rd, rs); } 63 } 64 65 // Move register if destination and source are different. 66 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 67 if (rs != rd) { z_lgr(rd, rs); } 68 } 69 70 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 71 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 72 if (rs != rd) { z_llgfr(rd, rs); } 73 } 74 75 // Move float register if destination and source are different. 76 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 77 if (rs != rd) { z_ldr(rd, rs); } 78 } 79 80 // Move integer register if destination and source are different. 81 // It is assumed that shorter-than-int types are already 82 // appropriately sign-extended. 83 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 84 BasicType src_type) { 85 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 86 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 87 88 if (dst_type == src_type) { 89 lgr_if_needed(dst, src); // Just move all 64 bits. 90 return; 91 } 92 93 switch (dst_type) { 94 // Do not support these types for now. 95 // case T_BOOLEAN: 96 case T_BYTE: // signed byte 97 switch (src_type) { 98 case T_INT: 99 z_lgbr(dst, src); 100 break; 101 default: 102 ShouldNotReachHere(); 103 } 104 return; 105 106 case T_CHAR: 107 case T_SHORT: 108 switch (src_type) { 109 case T_INT: 110 if (dst_type == T_CHAR) { 111 z_llghr(dst, src); 112 } else { 113 z_lghr(dst, src); 114 } 115 break; 116 default: 117 ShouldNotReachHere(); 118 } 119 return; 120 121 case T_INT: 122 switch (src_type) { 123 case T_BOOLEAN: 124 case T_BYTE: 125 case T_CHAR: 126 case T_SHORT: 127 case T_INT: 128 case T_LONG: 129 case T_OBJECT: 130 case T_ARRAY: 131 case T_VOID: 132 case T_ADDRESS: 133 lr_if_needed(dst, src); 134 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 135 return; 136 137 default: 138 assert(false, "non-integer src type"); 139 return; 140 } 141 case T_LONG: 142 switch (src_type) { 143 case T_BOOLEAN: 144 case T_BYTE: 145 case T_CHAR: 146 case T_SHORT: 147 case T_INT: 148 z_lgfr(dst, src); // sign extension 149 return; 150 151 case T_LONG: 152 case T_OBJECT: 153 case T_ARRAY: 154 case T_VOID: 155 case T_ADDRESS: 156 lgr_if_needed(dst, src); 157 return; 158 159 default: 160 assert(false, "non-integer src type"); 161 return; 162 } 163 return; 164 case T_OBJECT: 165 case T_ARRAY: 166 case T_VOID: 167 case T_ADDRESS: 168 switch (src_type) { 169 // These types don't make sense to be converted to pointers: 170 // case T_BOOLEAN: 171 // case T_BYTE: 172 // case T_CHAR: 173 // case T_SHORT: 174 175 case T_INT: 176 z_llgfr(dst, src); // zero extension 177 return; 178 179 case T_LONG: 180 case T_OBJECT: 181 case T_ARRAY: 182 case T_VOID: 183 case T_ADDRESS: 184 lgr_if_needed(dst, src); 185 return; 186 187 default: 188 assert(false, "non-integer src type"); 189 return; 190 } 191 return; 192 default: 193 assert(false, "non-integer dst type"); 194 return; 195 } 196 } 197 198 // Move float register if destination and source are different. 199 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 200 FloatRegister src, BasicType src_type) { 201 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 202 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 203 if (dst_type == src_type) { 204 ldr_if_needed(dst, src); // Just move all 64 bits. 205 } else { 206 switch (dst_type) { 207 case T_FLOAT: 208 assert(src_type == T_DOUBLE, "invalid float type combination"); 209 z_ledbr(dst, src); 210 return; 211 case T_DOUBLE: 212 assert(src_type == T_FLOAT, "invalid float type combination"); 213 z_ldebr(dst, src); 214 return; 215 default: 216 assert(false, "non-float dst type"); 217 return; 218 } 219 } 220 } 221 222 // Optimized emitter for reg to mem operations. 223 // Uses modern instructions if running on modern hardware, classic instructions 224 // otherwise. Prefers (usually shorter) classic instructions if applicable. 225 // Data register (reg) cannot be used as work register. 226 // 227 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 228 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 229 void MacroAssembler::freg2mem_opt(FloatRegister reg, 230 int64_t disp, 231 Register index, 232 Register base, 233 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 234 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 235 Register scratch) { 236 index = (index == noreg) ? Z_R0 : index; 237 if (Displacement::is_shortDisp(disp)) { 238 (this->*classic)(reg, disp, index, base); 239 } else { 240 if (Displacement::is_validDisp(disp)) { 241 (this->*modern)(reg, disp, index, base); 242 } else { 243 if (scratch != Z_R0 && scratch != Z_R1) { 244 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 245 } else { 246 if (scratch != Z_R0) { // scratch == Z_R1 247 if ((scratch == index) || (index == base)) { 248 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 249 } else { 250 add2reg(scratch, disp, base); 251 (this->*classic)(reg, 0, index, scratch); 252 if (base == scratch) { 253 add2reg(base, -disp); // Restore base. 254 } 255 } 256 } else { // scratch == Z_R0 257 z_lgr(scratch, base); 258 add2reg(base, disp); 259 (this->*classic)(reg, 0, index, base); 260 z_lgr(base, scratch); // Restore base. 261 } 262 } 263 } 264 } 265 } 266 267 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 268 if (is_double) { 269 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 270 } else { 271 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 272 } 273 } 274 275 // Optimized emitter for mem to reg operations. 276 // Uses modern instructions if running on modern hardware, classic instructions 277 // otherwise. Prefers (usually shorter) classic instructions if applicable. 278 // data register (reg) cannot be used as work register. 279 // 280 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 281 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 282 void MacroAssembler::mem2freg_opt(FloatRegister reg, 283 int64_t disp, 284 Register index, 285 Register base, 286 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 287 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 288 Register scratch) { 289 index = (index == noreg) ? Z_R0 : index; 290 if (Displacement::is_shortDisp(disp)) { 291 (this->*classic)(reg, disp, index, base); 292 } else { 293 if (Displacement::is_validDisp(disp)) { 294 (this->*modern)(reg, disp, index, base); 295 } else { 296 if (scratch != Z_R0 && scratch != Z_R1) { 297 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 298 } else { 299 if (scratch != Z_R0) { // scratch == Z_R1 300 if ((scratch == index) || (index == base)) { 301 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 302 } else { 303 add2reg(scratch, disp, base); 304 (this->*classic)(reg, 0, index, scratch); 305 if (base == scratch) { 306 add2reg(base, -disp); // Restore base. 307 } 308 } 309 } else { // scratch == Z_R0 310 z_lgr(scratch, base); 311 add2reg(base, disp); 312 (this->*classic)(reg, 0, index, base); 313 z_lgr(base, scratch); // Restore base. 314 } 315 } 316 } 317 } 318 } 319 320 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 321 if (is_double) { 322 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 323 } else { 324 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 325 } 326 } 327 328 // Optimized emitter for reg to mem operations. 329 // Uses modern instructions if running on modern hardware, classic instructions 330 // otherwise. Prefers (usually shorter) classic instructions if applicable. 331 // Data register (reg) cannot be used as work register. 332 // 333 // Don't rely on register locking, instead pass a scratch register 334 // (Z_R0 by default) 335 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 336 void MacroAssembler::reg2mem_opt(Register reg, 337 int64_t disp, 338 Register index, 339 Register base, 340 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 341 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 342 Register scratch) { 343 index = (index == noreg) ? Z_R0 : index; 344 if (Displacement::is_shortDisp(disp)) { 345 (this->*classic)(reg, disp, index, base); 346 } else { 347 if (Displacement::is_validDisp(disp)) { 348 (this->*modern)(reg, disp, index, base); 349 } else { 350 if (scratch != Z_R0 && scratch != Z_R1) { 351 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 352 } else { 353 if (scratch != Z_R0) { // scratch == Z_R1 354 if ((scratch == index) || (index == base)) { 355 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 356 } else { 357 add2reg(scratch, disp, base); 358 (this->*classic)(reg, 0, index, scratch); 359 if (base == scratch) { 360 add2reg(base, -disp); // Restore base. 361 } 362 } 363 } else { // scratch == Z_R0 364 if ((scratch == reg) || (scratch == base) || (reg == base)) { 365 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 366 } else { 367 z_lgr(scratch, base); 368 add2reg(base, disp); 369 (this->*classic)(reg, 0, index, base); 370 z_lgr(base, scratch); // Restore base. 371 } 372 } 373 } 374 } 375 } 376 } 377 378 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 379 int store_offset = offset(); 380 if (is_double) { 381 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 382 } else { 383 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 384 } 385 return store_offset; 386 } 387 388 // Optimized emitter for mem to reg operations. 389 // Uses modern instructions if running on modern hardware, classic instructions 390 // otherwise. Prefers (usually shorter) classic instructions if applicable. 391 // Data register (reg) will be used as work register where possible. 392 void MacroAssembler::mem2reg_opt(Register reg, 393 int64_t disp, 394 Register index, 395 Register base, 396 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 397 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 398 index = (index == noreg) ? Z_R0 : index; 399 if (Displacement::is_shortDisp(disp)) { 400 (this->*classic)(reg, disp, index, base); 401 } else { 402 if (Displacement::is_validDisp(disp)) { 403 (this->*modern)(reg, disp, index, base); 404 } else { 405 if ((reg == index) && (reg == base)) { 406 z_sllg(reg, reg, 1); 407 add2reg(reg, disp); 408 (this->*classic)(reg, 0, noreg, reg); 409 } else if ((reg == index) && (reg != Z_R0)) { 410 add2reg(reg, disp); 411 (this->*classic)(reg, 0, reg, base); 412 } else if (reg == base) { 413 add2reg(reg, disp); 414 (this->*classic)(reg, 0, index, reg); 415 } else if (reg != Z_R0) { 416 add2reg(reg, disp, base); 417 (this->*classic)(reg, 0, index, reg); 418 } else { // reg == Z_R0 && reg != base here 419 add2reg(base, disp); 420 (this->*classic)(reg, 0, index, base); 421 add2reg(base, -disp); 422 } 423 } 424 } 425 } 426 427 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 428 if (is_double) { 429 z_lg(reg, a); 430 } else { 431 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 432 } 433 } 434 435 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 436 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 437 } 438 439 void MacroAssembler::and_imm(Register r, long mask, 440 Register tmp /* = Z_R0 */, 441 bool wide /* = false */) { 442 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 443 444 if (!wide) { 445 z_nilf(r, mask); 446 return; 447 } 448 449 assert(r != tmp, " need a different temporary register !"); 450 load_const_optimized(tmp, mask); 451 z_ngr(r, tmp); 452 } 453 454 // Calculate the 1's complement. 455 // Note: The condition code is neither preserved nor correctly set by this code!!! 456 // Note: (wide == false) does not protect the high order half of the target register 457 // from alteration. It only serves as optimization hint for 32-bit results. 458 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 459 460 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 461 z_xilf(r1, -1); 462 if (wide) { 463 z_xihf(r1, -1); 464 } 465 } else { // Distinct src and dst registers. 466 load_const_optimized(r1, -1); 467 z_xgr(r1, r2); 468 } 469 } 470 471 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 472 assert(lBitPos >= 0, "zero is leftmost bit position"); 473 assert(rBitPos <= 63, "63 is rightmost bit position"); 474 assert(lBitPos <= rBitPos, "inverted selection interval"); 475 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 476 } 477 478 // Helper function for the "Rotate_then_<logicalOP>" emitters. 479 // Rotate src, then mask register contents such that only bits in range survive. 480 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 481 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 482 // The caller must ensure that the selected range only contains bits with defined value. 483 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 484 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 485 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 486 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 487 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 488 // Pre-determine which parts of dst will be zero after shift/rotate. 489 bool llZero = sll4rll && (nRotate >= 16); 490 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 491 bool lfZero = llZero && lhZero; 492 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 493 bool hhZero = (srl4rll && (nRotate <= -16)); 494 bool hfZero = hlZero && hhZero; 495 496 // rotate then mask src operand. 497 // if oneBits == true, all bits outside selected range are 1s. 498 // if oneBits == false, all bits outside selected range are 0s. 499 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 500 if (dst32bit) { 501 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 502 } else { 503 if (sll4rll) { z_sllg(dst, src, nRotate); } 504 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 505 else { z_rllg(dst, src, nRotate); } 506 } 507 } else { 508 if (sll4rll) { z_sllg(dst, src, nRotate); } 509 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 510 else { z_rllg(dst, src, nRotate); } 511 } 512 513 unsigned long range_mask = create_mask(lBitPos, rBitPos); 514 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 515 unsigned int range_mask_l = (unsigned int)range_mask; 516 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 517 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 518 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 519 unsigned short range_mask_ll = (unsigned short)range_mask; 520 // Works for z9 and newer H/W. 521 if (oneBits) { 522 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 523 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 524 } else { 525 // All bits outside range become 0s 526 if (((~range_mask_l) != 0) && !lfZero) { 527 z_nilf(dst, range_mask_l); 528 } 529 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 530 z_nihf(dst, range_mask_h); 531 } 532 } 533 } 534 535 // Rotate src, then insert selected range from rotated src into dst. 536 // Clear dst before, if requested. 537 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 538 int nRotate, bool clear_dst) { 539 // This version does not depend on src being zero-extended int2long. 540 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 541 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 542 } 543 544 // Rotate src, then and selected range from rotated src into dst. 545 // Set condition code only if so requested. Otherwise it is unpredictable. 546 // See performance note in macroAssembler_s390.hpp for important information. 547 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 548 int nRotate, bool test_only) { 549 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 550 // This version does not depend on src being zero-extended int2long. 551 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 552 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 553 } 554 555 // Rotate src, then or selected range from rotated src into dst. 556 // Set condition code only if so requested. Otherwise it is unpredictable. 557 // See performance note in macroAssembler_s390.hpp for important information. 558 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 559 int nRotate, bool test_only) { 560 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 561 // This version does not depend on src being zero-extended int2long. 562 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 563 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 564 } 565 566 // Rotate src, then xor selected range from rotated src into dst. 567 // Set condition code only if so requested. Otherwise it is unpredictable. 568 // See performance note in macroAssembler_s390.hpp for important information. 569 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 570 int nRotate, bool test_only) { 571 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 572 // This version does not depend on src being zero-extended int2long. 573 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 574 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 575 } 576 577 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 578 if (inc.is_register()) { 579 z_agr(r1, inc.as_register()); 580 } else { // constant 581 intptr_t imm = inc.as_constant(); 582 add2reg(r1, imm); 583 } 584 } 585 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 586 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 587 // calculation and is thus rather slow. 588 // 589 // There is no handling for special cases, e.g. cval==0 or cval==1. 590 // 591 // Returns len of generated code block. 592 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 593 int block_start = offset(); 594 595 bool sign_flip = cval < 0; 596 cval = sign_flip ? -cval : cval; 597 598 BLOCK_COMMENT("Reg64*Con16 {"); 599 600 int bit1 = cval & -cval; 601 if (bit1 == cval) { 602 z_sllg(rval, rval, exact_log2(bit1)); 603 if (sign_flip) { z_lcgr(rval, rval); } 604 } else { 605 int bit2 = (cval-bit1) & -(cval-bit1); 606 if ((bit1+bit2) == cval) { 607 z_sllg(work, rval, exact_log2(bit1)); 608 z_sllg(rval, rval, exact_log2(bit2)); 609 z_agr(rval, work); 610 if (sign_flip) { z_lcgr(rval, rval); } 611 } else { 612 if (sign_flip) { z_mghi(rval, -cval); } 613 else { z_mghi(rval, cval); } 614 } 615 } 616 BLOCK_COMMENT("} Reg64*Con16"); 617 618 int block_end = offset(); 619 return block_end - block_start; 620 } 621 622 // Generic operation r1 := r2 + imm. 623 // 624 // Should produce the best code for each supported CPU version. 625 // r2 == noreg yields r1 := r1 + imm 626 // imm == 0 emits either no instruction or r1 := r2 ! 627 // NOTES: 1) Don't use this function where fixed sized 628 // instruction sequences are required!!! 629 // 2) Don't use this function if condition code 630 // setting is required! 631 // 3) Despite being declared as int64_t, the parameter imm 632 // must be a simm_32 value (= signed 32-bit integer). 633 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 634 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 635 636 if (r2 == noreg) { r2 = r1; } 637 638 // Handle special case imm == 0. 639 if (imm == 0) { 640 lgr_if_needed(r1, r2); 641 // Nothing else to do. 642 return; 643 } 644 645 if (!PreferLAoverADD || (r2 == Z_R0)) { 646 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 647 648 // Can we encode imm in 16 bits signed? 649 if (Immediate::is_simm16(imm)) { 650 if (r1 == r2) { 651 z_aghi(r1, imm); 652 return; 653 } 654 if (distinctOpnds) { 655 z_aghik(r1, r2, imm); 656 return; 657 } 658 z_lgr(r1, r2); 659 z_aghi(r1, imm); 660 return; 661 } 662 } else { 663 // Can we encode imm in 12 bits unsigned? 664 if (Displacement::is_shortDisp(imm)) { 665 z_la(r1, imm, r2); 666 return; 667 } 668 // Can we encode imm in 20 bits signed? 669 if (Displacement::is_validDisp(imm)) { 670 // Always use LAY instruction, so we don't need the tmp register. 671 z_lay(r1, imm, r2); 672 return; 673 } 674 675 } 676 677 // Can handle it (all possible values) with long immediates. 678 lgr_if_needed(r1, r2); 679 z_agfi(r1, imm); 680 } 681 682 // Generic operation r := b + x + d 683 // 684 // Addition of several operands with address generation semantics - sort of: 685 // - no restriction on the registers. Any register will do for any operand. 686 // - x == noreg: operand will be disregarded. 687 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 688 // - x == Z_R0: just disregard 689 // - b == Z_R0: use as operand. This is not address generation semantics!!! 690 // 691 // The same restrictions as on add2reg() are valid!!! 692 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 693 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 694 695 if (x == noreg) { x = Z_R0; } 696 if (b == noreg) { b = r; } 697 698 // Handle special case x == R0. 699 if (x == Z_R0) { 700 // Can simply add the immediate value to the base register. 701 add2reg(r, d, b); 702 return; 703 } 704 705 if (!PreferLAoverADD || (b == Z_R0)) { 706 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 707 // Handle special case d == 0. 708 if (d == 0) { 709 if (b == x) { z_sllg(r, b, 1); return; } 710 if (r == x) { z_agr(r, b); return; } 711 if (r == b) { z_agr(r, x); return; } 712 if (distinctOpnds) { z_agrk(r, x, b); return; } 713 z_lgr(r, b); 714 z_agr(r, x); 715 } else { 716 if (x == b) { z_sllg(r, x, 1); } 717 else if (r == x) { z_agr(r, b); } 718 else if (r == b) { z_agr(r, x); } 719 else if (distinctOpnds) { z_agrk(r, x, b); } 720 else { 721 z_lgr(r, b); 722 z_agr(r, x); 723 } 724 add2reg(r, d); 725 } 726 } else { 727 // Can we encode imm in 12 bits unsigned? 728 if (Displacement::is_shortDisp(d)) { 729 z_la(r, d, x, b); 730 return; 731 } 732 // Can we encode imm in 20 bits signed? 733 if (Displacement::is_validDisp(d)) { 734 z_lay(r, d, x, b); 735 return; 736 } 737 z_la(r, 0, x, b); 738 add2reg(r, d); 739 } 740 } 741 742 // Generic emitter (32bit) for direct memory increment. 743 // For optimal code, do not specify Z_R0 as temp register. 744 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 745 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 746 z_asi(a, imm); 747 } else { 748 z_lgf(tmp, a); 749 add2reg(tmp, imm); 750 z_st(tmp, a); 751 } 752 } 753 754 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 755 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 756 z_agsi(a, imm); 757 } else { 758 z_lg(tmp, a); 759 add2reg(tmp, imm); 760 z_stg(tmp, a); 761 } 762 } 763 764 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 765 switch (size_in_bytes) { 766 case 8: z_lg(dst, src); break; 767 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 768 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 769 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 770 default: ShouldNotReachHere(); 771 } 772 } 773 774 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 775 switch (size_in_bytes) { 776 case 8: z_stg(src, dst); break; 777 case 4: z_st(src, dst); break; 778 case 2: z_sth(src, dst); break; 779 case 1: z_stc(src, dst); break; 780 default: ShouldNotReachHere(); 781 } 782 } 783 784 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 785 // a high-order summand in register tmp. 786 // 787 // return value: < 0: No split required, si20 actually has property uimm12. 788 // >= 0: Split performed. Use return value as uimm12 displacement and 789 // tmp as index register. 790 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 791 assert(Immediate::is_simm20(si20_offset), "sanity"); 792 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 793 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 794 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 795 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 796 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 797 798 Register work = accumulate? Z_R0 : tmp; 799 800 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 801 z_lghi(work, ll_off>>12); // Implicit sign extension. 802 z_slag(work, work, 12); 803 } else { // Len of code = 0..10. 804 if (ll_off == 0) { return -1; } 805 // ll_off has 8 significant bits (at most) plus sign. 806 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 807 z_llilh(work, ll_off >> 16); 808 if (ll_off < 0) { // Sign-extension required. 809 z_lgfr(work, work); 810 } 811 } else { 812 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 813 z_llill(work, ll_off); 814 } else { // Non-zero bits in both halfbytes. 815 z_lghi(work, ll_off>>12); // Implicit sign extension. 816 z_slag(work, work, 12); 817 } 818 } 819 } 820 if (accumulate) { z_algr(tmp, work); } // len of code += 4 821 return lg_off; 822 } 823 824 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 825 if (Displacement::is_validDisp(si20)) { 826 z_ley(t, si20, a); 827 } else { 828 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 829 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 830 // pool loads). 831 bool accumulate = true; 832 bool fixed_codelen = true; 833 Register work; 834 835 if (fixed_codelen) { 836 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 837 } else { 838 accumulate = (a == tmp); 839 } 840 work = tmp; 841 842 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 843 if (disp12 < 0) { 844 z_le(t, si20, work); 845 } else { 846 if (accumulate) { 847 z_le(t, disp12, work); 848 } else { 849 z_le(t, disp12, work, a); 850 } 851 } 852 } 853 } 854 855 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 856 if (Displacement::is_validDisp(si20)) { 857 z_ldy(t, si20, a); 858 } else { 859 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 860 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 861 // pool loads). 862 bool accumulate = true; 863 bool fixed_codelen = true; 864 Register work; 865 866 if (fixed_codelen) { 867 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 868 } else { 869 accumulate = (a == tmp); 870 } 871 work = tmp; 872 873 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 874 if (disp12 < 0) { 875 z_ld(t, si20, work); 876 } else { 877 if (accumulate) { 878 z_ld(t, disp12, work); 879 } else { 880 z_ld(t, disp12, work, a); 881 } 882 } 883 } 884 } 885 886 // PCrelative TOC access. 887 // Returns distance (in bytes) from current position to start of consts section. 888 // Returns 0 (zero) if no consts section exists or if it has size zero. 889 long MacroAssembler::toc_distance() { 890 CodeSection* cs = code()->consts(); 891 return (long)((cs != NULL) ? cs->start()-pc() : 0); 892 } 893 894 // Implementation on x86/sparc assumes that constant and instruction section are 895 // adjacent, but this doesn't hold. Two special situations may occur, that we must 896 // be able to handle: 897 // 1. const section may be located apart from the inst section. 898 // 2. const section may be empty 899 // In both cases, we use the const section's start address to compute the "TOC", 900 // this seems to occur only temporarily; in the final step we always seem to end up 901 // with the pc-relatice variant. 902 // 903 // PC-relative offset could be +/-2**32 -> use long for disp 904 // Furthermore: makes no sense to have special code for 905 // adjacent const and inst sections. 906 void MacroAssembler::load_toc(Register Rtoc) { 907 // Simply use distance from start of const section (should be patched in the end). 908 long disp = toc_distance(); 909 910 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 911 relocate(rspec); 912 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 913 } 914 915 // PCrelative TOC access. 916 // Load from anywhere pcrelative (with relocation of load instr) 917 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 918 address pc = this->pc(); 919 ptrdiff_t total_distance = dataLocation - pc; 920 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 921 922 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 923 assert(total_distance != 0, "sanity"); 924 925 // Some extra safety net. 926 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 927 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 928 } 929 930 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 931 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 932 } 933 934 935 // PCrelative TOC access. 936 // Load from anywhere pcrelative (with relocation of load instr) 937 // loaded addr has to be relocated when added to constant pool. 938 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 939 address pc = this->pc(); 940 ptrdiff_t total_distance = addrLocation - pc; 941 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 942 943 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 944 945 // Some extra safety net. 946 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 947 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 948 } 949 950 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 951 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 952 } 953 954 // Generic operation: load a value from memory and test. 955 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 956 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 957 z_lb(dst, a); 958 z_ltr(dst, dst); 959 } 960 961 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 962 int64_t disp = a.disp20(); 963 if (Displacement::is_shortDisp(disp)) { 964 z_lh(dst, a); 965 } else if (Displacement::is_longDisp(disp)) { 966 z_lhy(dst, a); 967 } else { 968 guarantee(false, "displacement out of range"); 969 } 970 z_ltr(dst, dst); 971 } 972 973 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 974 z_lt(dst, a); 975 } 976 977 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 978 z_ltgf(dst, a); 979 } 980 981 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 982 z_ltg(dst, a); 983 } 984 985 // Test a bit in memory. 986 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 987 assert(a.index() == noreg, "no index reg allowed in testbit"); 988 if (bit <= 7) { 989 z_tm(a.disp() + 3, a.base(), 1 << bit); 990 } else if (bit <= 15) { 991 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 992 } else if (bit <= 23) { 993 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 994 } else if (bit <= 31) { 995 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 996 } else { 997 ShouldNotReachHere(); 998 } 999 } 1000 1001 // Test a bit in a register. Result is reflected in CC. 1002 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1003 if (bitPos < 16) { 1004 z_tmll(r, 1U<<bitPos); 1005 } else if (bitPos < 32) { 1006 z_tmlh(r, 1U<<(bitPos-16)); 1007 } else if (bitPos < 48) { 1008 z_tmhl(r, 1U<<(bitPos-32)); 1009 } else if (bitPos < 64) { 1010 z_tmhh(r, 1U<<(bitPos-48)); 1011 } else { 1012 ShouldNotReachHere(); 1013 } 1014 } 1015 1016 void MacroAssembler::prefetch_read(Address a) { 1017 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1018 } 1019 void MacroAssembler::prefetch_update(Address a) { 1020 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1021 } 1022 1023 // Clear a register, i.e. load const zero into reg. 1024 // Return len (in bytes) of generated instruction(s). 1025 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1026 // set_cc: Use instruction that sets the condition code, if true. 1027 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1028 unsigned int start_off = offset(); 1029 if (whole_reg) { 1030 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1031 } else { // Only 32bit register. 1032 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1033 } 1034 return offset() - start_off; 1035 } 1036 1037 #ifdef ASSERT 1038 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1039 switch (pattern_len) { 1040 case 1: 1041 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1042 case 2: 1043 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1044 case 4: 1045 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1046 case 8: 1047 return load_const_optimized_rtn_len(r, pattern, true); 1048 break; 1049 default: 1050 guarantee(false, "preset_reg: bad len"); 1051 } 1052 return 0; 1053 } 1054 #endif 1055 1056 // addr: Address descriptor of memory to clear. Index register will not be used! 1057 // size: Number of bytes to clear. 1058 // condition code will not be preserved. 1059 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1060 // !!! Use store_const() instead !!! 1061 void MacroAssembler::clear_mem(const Address& addr, unsigned int size) { 1062 guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large"); 1063 1064 switch (size) { 1065 case 0: 1066 return; 1067 case 1: 1068 z_mvi(addr, 0); 1069 return; 1070 case 2: 1071 z_mvhhi(addr, 0); 1072 return; 1073 case 4: 1074 z_mvhi(addr, 0); 1075 return; 1076 case 8: 1077 z_mvghi(addr, 0); 1078 return; 1079 default: ; // Fallthru to xc. 1080 } 1081 1082 // Caution: the emitter with Address operands does implicitly decrement the length 1083 if (size <= 256) { 1084 z_xc(addr, size, addr); 1085 } else { 1086 unsigned int offset = addr.disp(); 1087 unsigned int incr = 256; 1088 for (unsigned int i = 0; i <= size-incr; i += incr) { 1089 z_xc(offset, incr - 1, addr.base(), offset, addr.base()); 1090 offset += incr; 1091 } 1092 unsigned int rest = size - (offset - addr.disp()); 1093 if (size > 0) { 1094 z_xc(offset, rest-1, addr.base(), offset, addr.base()); 1095 } 1096 } 1097 } 1098 1099 void MacroAssembler::align(int modulus) { 1100 while (offset() % modulus != 0) z_nop(); 1101 } 1102 1103 // Special version for non-relocateable code if required alignment 1104 // is larger than CodeEntryAlignment. 1105 void MacroAssembler::align_address(int modulus) { 1106 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1107 } 1108 1109 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1110 Register temp_reg, 1111 int64_t extra_slot_offset) { 1112 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1113 // which issues an unnecessary add instruction. 1114 int stackElementSize = Interpreter::stackElementSize; 1115 int64_t offset = extra_slot_offset * stackElementSize; 1116 const Register argbase = Z_esp; 1117 if (arg_slot.is_constant()) { 1118 offset += arg_slot.as_constant() * stackElementSize; 1119 return Address(argbase, offset); 1120 } 1121 // else 1122 assert(temp_reg != noreg, "must specify"); 1123 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1124 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1125 return Address(argbase, temp_reg, offset); 1126 } 1127 1128 1129 //=================================================================== 1130 //=== START C O N S T A N T S I N C O D E S T R E A M === 1131 //=================================================================== 1132 //=== P A T CH A B L E C O N S T A N T S === 1133 //=================================================================== 1134 1135 1136 //--------------------------------------------------- 1137 // Load (patchable) constant into register 1138 //--------------------------------------------------- 1139 1140 1141 // Load absolute address (and try to optimize). 1142 // Note: This method is usable only for position-fixed code, 1143 // referring to a position-fixed target location. 1144 // If not so, relocations and patching must be used. 1145 void MacroAssembler::load_absolute_address(Register d, address addr) { 1146 assert(addr != NULL, "should not happen"); 1147 BLOCK_COMMENT("load_absolute_address:"); 1148 if (addr == NULL) { 1149 z_larl(d, pc()); // Dummy emit for size calc. 1150 return; 1151 } 1152 1153 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1154 z_larl(d, addr); 1155 return; 1156 } 1157 1158 load_const_optimized(d, (long)addr); 1159 } 1160 1161 // Load a 64bit constant. 1162 // Patchable code sequence, but not atomically patchable. 1163 // Make sure to keep code size constant -> no value-dependent optimizations. 1164 // Do not kill condition code. 1165 void MacroAssembler::load_const(Register t, long x) { 1166 // Note: Right shift is only cleanly defined for unsigned types 1167 // or for signed types with nonnegative values. 1168 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1169 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1170 } 1171 1172 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1173 // Patchable code sequence, but not atomically patchable. 1174 // Make sure to keep code size constant -> no value-dependent optimizations. 1175 // Do not kill condition code. 1176 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1177 if (sign_extend) { Assembler::z_lgfi(t, x); } 1178 else { Assembler::z_llilf(t, x); } 1179 } 1180 1181 // Load narrow oop constant, no decompression. 1182 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1183 assert(UseCompressedOops, "must be on to call this method"); 1184 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1185 } 1186 1187 // Load narrow klass constant, compression required. 1188 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1189 assert(UseCompressedClassPointers, "must be on to call this method"); 1190 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1191 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1192 } 1193 1194 //------------------------------------------------------ 1195 // Compare (patchable) constant with register. 1196 //------------------------------------------------------ 1197 1198 // Compare narrow oop in reg with narrow oop constant, no decompression. 1199 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1200 assert(UseCompressedOops, "must be on to call this method"); 1201 1202 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1203 } 1204 1205 // Compare narrow oop in reg with narrow oop constant, no decompression. 1206 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1207 assert(UseCompressedClassPointers, "must be on to call this method"); 1208 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1209 1210 Assembler::z_clfi(klass1, encoded_k); 1211 } 1212 1213 //---------------------------------------------------------- 1214 // Check which kind of load_constant we have here. 1215 //---------------------------------------------------------- 1216 1217 // Detection of CPU version dependent load_const sequence. 1218 // The detection is valid only for code sequences generated by load_const, 1219 // not load_const_optimized. 1220 bool MacroAssembler::is_load_const(address a) { 1221 unsigned long inst1, inst2; 1222 unsigned int len1, len2; 1223 1224 len1 = get_instruction(a, &inst1); 1225 len2 = get_instruction(a + len1, &inst2); 1226 1227 return is_z_iihf(inst1) && is_z_iilf(inst2); 1228 } 1229 1230 // Detection of CPU version dependent load_const_32to64 sequence. 1231 // Mostly used for narrow oops and narrow Klass pointers. 1232 // The detection is valid only for code sequences generated by load_const_32to64. 1233 bool MacroAssembler::is_load_const_32to64(address pos) { 1234 unsigned long inst1, inst2; 1235 unsigned int len1; 1236 1237 len1 = get_instruction(pos, &inst1); 1238 return is_z_llilf(inst1); 1239 } 1240 1241 // Detection of compare_immediate_narrow sequence. 1242 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1243 bool MacroAssembler::is_compare_immediate32(address pos) { 1244 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1245 } 1246 1247 // Detection of compare_immediate_narrow sequence. 1248 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1249 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1250 return is_compare_immediate32(pos); 1251 } 1252 1253 // Detection of compare_immediate_narrow sequence. 1254 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1255 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1256 return is_compare_immediate32(pos); 1257 } 1258 1259 //----------------------------------- 1260 // patch the load_constant 1261 //----------------------------------- 1262 1263 // CPU-version dependent patching of load_const. 1264 void MacroAssembler::patch_const(address a, long x) { 1265 assert(is_load_const(a), "not a load of a constant"); 1266 // Note: Right shift is only cleanly defined for unsigned types 1267 // or for signed types with nonnegative values. 1268 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1269 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1270 } 1271 1272 // Patching the value of CPU version dependent load_const_32to64 sequence. 1273 // The passed ptr MUST be in compressed format! 1274 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1275 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1276 1277 set_imm32(pos, np); 1278 return 6; 1279 } 1280 1281 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1282 // The passed ptr MUST be in compressed format! 1283 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1284 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1285 1286 set_imm32(pos, np); 1287 return 6; 1288 } 1289 1290 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1291 // The passed ptr must NOT be in compressed format! 1292 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1293 assert(UseCompressedOops, "Can only patch compressed oops"); 1294 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1295 } 1296 1297 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1298 // The passed ptr must NOT be in compressed format! 1299 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1300 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1301 1302 narrowKlass nk = CompressedKlassPointers::encode(k); 1303 return patch_load_const_32to64(pos, nk); 1304 } 1305 1306 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1307 // The passed ptr must NOT be in compressed format! 1308 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1309 assert(UseCompressedOops, "Can only patch compressed oops"); 1310 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1311 } 1312 1313 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1314 // The passed ptr must NOT be in compressed format! 1315 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1316 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1317 1318 narrowKlass nk = CompressedKlassPointers::encode(k); 1319 return patch_compare_immediate_32(pos, nk); 1320 } 1321 1322 //------------------------------------------------------------------------ 1323 // Extract the constant from a load_constant instruction stream. 1324 //------------------------------------------------------------------------ 1325 1326 // Get constant from a load_const sequence. 1327 long MacroAssembler::get_const(address a) { 1328 assert(is_load_const(a), "not a load of a constant"); 1329 unsigned long x; 1330 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1331 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1332 return (long) x; 1333 } 1334 1335 //-------------------------------------- 1336 // Store a constant in memory. 1337 //-------------------------------------- 1338 1339 // General emitter to move a constant to memory. 1340 // The store is atomic. 1341 // o Address must be given in RS format (no index register) 1342 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1343 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1344 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1345 // o Memory slot must be at least as wide as constant, will assert otherwise. 1346 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1347 int MacroAssembler::store_const(const Address &dest, long imm, 1348 unsigned int lm, unsigned int lc, 1349 Register scratch) { 1350 int64_t disp = dest.disp(); 1351 Register base = dest.base(); 1352 assert(!dest.has_index(), "not supported"); 1353 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1354 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1355 assert(lm>=lc, "memory slot too small"); 1356 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1357 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1358 1359 bool is_shortDisp = Displacement::is_shortDisp(disp); 1360 int store_offset = -1; 1361 1362 // For target len == 1 it's easy. 1363 if (lm == 1) { 1364 store_offset = offset(); 1365 if (is_shortDisp) { 1366 z_mvi(disp, base, imm); 1367 return store_offset; 1368 } else { 1369 z_mviy(disp, base, imm); 1370 return store_offset; 1371 } 1372 } 1373 1374 // All the "good stuff" takes an unsigned displacement. 1375 if (is_shortDisp) { 1376 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1377 1378 store_offset = offset(); 1379 switch (lm) { 1380 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1381 z_mvhhi(disp, base, imm); 1382 return store_offset; 1383 case 4: 1384 if (Immediate::is_simm16(imm)) { 1385 z_mvhi(disp, base, imm); 1386 return store_offset; 1387 } 1388 break; 1389 case 8: 1390 if (Immediate::is_simm16(imm)) { 1391 z_mvghi(disp, base, imm); 1392 return store_offset; 1393 } 1394 break; 1395 default: 1396 ShouldNotReachHere(); 1397 break; 1398 } 1399 } 1400 1401 // Can't optimize, so load value and store it. 1402 guarantee(scratch != noreg, " need a scratch register here !"); 1403 if (imm != 0) { 1404 load_const_optimized(scratch, imm); // Preserves CC anyway. 1405 } else { 1406 // Leave CC alone!! 1407 (void) clear_reg(scratch, true, false); // Indicate unused result. 1408 } 1409 1410 store_offset = offset(); 1411 if (is_shortDisp) { 1412 switch (lm) { 1413 case 2: 1414 z_sth(scratch, disp, Z_R0, base); 1415 return store_offset; 1416 case 4: 1417 z_st(scratch, disp, Z_R0, base); 1418 return store_offset; 1419 case 8: 1420 z_stg(scratch, disp, Z_R0, base); 1421 return store_offset; 1422 default: 1423 ShouldNotReachHere(); 1424 break; 1425 } 1426 } else { 1427 switch (lm) { 1428 case 2: 1429 z_sthy(scratch, disp, Z_R0, base); 1430 return store_offset; 1431 case 4: 1432 z_sty(scratch, disp, Z_R0, base); 1433 return store_offset; 1434 case 8: 1435 z_stg(scratch, disp, Z_R0, base); 1436 return store_offset; 1437 default: 1438 ShouldNotReachHere(); 1439 break; 1440 } 1441 } 1442 return -1; // should not reach here 1443 } 1444 1445 //=================================================================== 1446 //=== N O T P A T CH A B L E C O N S T A N T S === 1447 //=================================================================== 1448 1449 // Load constant x into register t with a fast instruction sequence 1450 // depending on the bits in x. Preserves CC under all circumstances. 1451 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1452 if (x == 0) { 1453 int len; 1454 if (emit) { 1455 len = clear_reg(t, true, false); 1456 } else { 1457 len = 4; 1458 } 1459 return len; 1460 } 1461 1462 if (Immediate::is_simm16(x)) { 1463 if (emit) { z_lghi(t, x); } 1464 return 4; 1465 } 1466 1467 // 64 bit value: | part1 | part2 | part3 | part4 | 1468 // At least one part is not zero! 1469 // Note: Right shift is only cleanly defined for unsigned types 1470 // or for signed types with nonnegative values. 1471 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1472 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1473 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1474 int part4 = (int)x & 0x0000ffff; 1475 int part12 = (int)((unsigned long)x >> 32); 1476 int part34 = (int)x; 1477 1478 // Lower word only (unsigned). 1479 if (part12 == 0) { 1480 if (part3 == 0) { 1481 if (emit) z_llill(t, part4); 1482 return 4; 1483 } 1484 if (part4 == 0) { 1485 if (emit) z_llilh(t, part3); 1486 return 4; 1487 } 1488 if (emit) z_llilf(t, part34); 1489 return 6; 1490 } 1491 1492 // Upper word only. 1493 if (part34 == 0) { 1494 if (part1 == 0) { 1495 if (emit) z_llihl(t, part2); 1496 return 4; 1497 } 1498 if (part2 == 0) { 1499 if (emit) z_llihh(t, part1); 1500 return 4; 1501 } 1502 if (emit) z_llihf(t, part12); 1503 return 6; 1504 } 1505 1506 // Lower word only (signed). 1507 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1508 if (emit) z_lgfi(t, part34); 1509 return 6; 1510 } 1511 1512 int len = 0; 1513 1514 if ((part1 == 0) || (part2 == 0)) { 1515 if (part1 == 0) { 1516 if (emit) z_llihl(t, part2); 1517 len += 4; 1518 } else { 1519 if (emit) z_llihh(t, part1); 1520 len += 4; 1521 } 1522 } else { 1523 if (emit) z_llihf(t, part12); 1524 len += 6; 1525 } 1526 1527 if ((part3 == 0) || (part4 == 0)) { 1528 if (part3 == 0) { 1529 if (emit) z_iill(t, part4); 1530 len += 4; 1531 } else { 1532 if (emit) z_iilh(t, part3); 1533 len += 4; 1534 } 1535 } else { 1536 if (emit) z_iilf(t, part34); 1537 len += 6; 1538 } 1539 return len; 1540 } 1541 1542 //===================================================================== 1543 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1544 //===================================================================== 1545 1546 // Note: In the worst case, one of the scratch registers is destroyed!!! 1547 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1548 // Right operand is constant. 1549 if (x2.is_constant()) { 1550 jlong value = x2.as_constant(); 1551 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1552 return; 1553 } 1554 1555 // Right operand is in register. 1556 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1557 } 1558 1559 // Note: In the worst case, one of the scratch registers is destroyed!!! 1560 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1561 // Right operand is constant. 1562 if (x2.is_constant()) { 1563 jlong value = x2.as_constant(); 1564 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1565 return; 1566 } 1567 1568 // Right operand is in register. 1569 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1570 } 1571 1572 // Note: In the worst case, one of the scratch registers is destroyed!!! 1573 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1574 // Right operand is constant. 1575 if (x2.is_constant()) { 1576 jlong value = x2.as_constant(); 1577 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1578 return; 1579 } 1580 1581 // Right operand is in register. 1582 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1583 } 1584 1585 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1586 // Right operand is constant. 1587 if (x2.is_constant()) { 1588 jlong value = x2.as_constant(); 1589 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1590 return; 1591 } 1592 1593 // Right operand is in register. 1594 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1595 } 1596 1597 // Generate an optimal branch to the branch target. 1598 // Optimal means that a relative branch (brc or brcl) is used if the 1599 // branch distance is short enough. Loading the target address into a 1600 // register and branching via reg is used as fallback only. 1601 // 1602 // Used registers: 1603 // Z_R1 - work reg. Holds branch target address. 1604 // Used in fallback case only. 1605 // 1606 // This version of branch_optimized is good for cases where the target address is known 1607 // and constant, i.e. is never changed (no relocation, no patching). 1608 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1609 address branch_origin = pc(); 1610 1611 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1612 z_brc(cond, branch_addr); 1613 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1614 z_brcl(cond, branch_addr); 1615 } else { 1616 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1617 z_bcr(cond, Z_R1); 1618 } 1619 } 1620 1621 // This version of branch_optimized is good for cases where the target address 1622 // is potentially not yet known at the time the code is emitted. 1623 // 1624 // One very common case is a branch to an unbound label which is handled here. 1625 // The caller might know (or hope) that the branch distance is short enough 1626 // to be encoded in a 16bit relative address. In this case he will pass a 1627 // NearLabel branch_target. 1628 // Care must be taken with unbound labels. Each call to target(label) creates 1629 // an entry in the patch queue for that label to patch all references of the label 1630 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1631 // an assertion fires at patch time. 1632 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1633 if (branch_target.is_bound()) { 1634 address branch_addr = target(branch_target); 1635 branch_optimized(cond, branch_addr); 1636 } else if (branch_target.is_near()) { 1637 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1638 } else { 1639 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1640 } 1641 } 1642 1643 // Generate an optimal compare and branch to the branch target. 1644 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1645 // branch distance is short enough. Loading the target address into a 1646 // register and branching via reg is used as fallback only. 1647 // 1648 // Input: 1649 // r1 - left compare operand 1650 // r2 - right compare operand 1651 void MacroAssembler::compare_and_branch_optimized(Register r1, 1652 Register r2, 1653 Assembler::branch_condition cond, 1654 address branch_addr, 1655 bool len64, 1656 bool has_sign) { 1657 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1658 1659 address branch_origin = pc(); 1660 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1661 switch (casenum) { 1662 case 0: z_crj( r1, r2, cond, branch_addr); break; 1663 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1664 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1665 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1666 default: ShouldNotReachHere(); break; 1667 } 1668 } else { 1669 switch (casenum) { 1670 case 0: z_cr( r1, r2); break; 1671 case 1: z_clr(r1, r2); break; 1672 case 2: z_cgr(r1, r2); break; 1673 case 3: z_clgr(r1, r2); break; 1674 default: ShouldNotReachHere(); break; 1675 } 1676 branch_optimized(cond, branch_addr); 1677 } 1678 } 1679 1680 // Generate an optimal compare and branch to the branch target. 1681 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1682 // branch distance is short enough. Loading the target address into a 1683 // register and branching via reg is used as fallback only. 1684 // 1685 // Input: 1686 // r1 - left compare operand (in register) 1687 // x2 - right compare operand (immediate) 1688 void MacroAssembler::compare_and_branch_optimized(Register r1, 1689 jlong x2, 1690 Assembler::branch_condition cond, 1691 Label& branch_target, 1692 bool len64, 1693 bool has_sign) { 1694 address branch_origin = pc(); 1695 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1696 bool is_RelAddr16 = branch_target.is_near() || 1697 (branch_target.is_bound() && 1698 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1699 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1700 1701 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1702 switch (casenum) { 1703 case 0: z_cij( r1, x2, cond, branch_target); break; 1704 case 1: z_clij(r1, x2, cond, branch_target); break; 1705 case 2: z_cgij(r1, x2, cond, branch_target); break; 1706 case 3: z_clgij(r1, x2, cond, branch_target); break; 1707 default: ShouldNotReachHere(); break; 1708 } 1709 return; 1710 } 1711 1712 if (x2 == 0) { 1713 switch (casenum) { 1714 case 0: z_ltr(r1, r1); break; 1715 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1716 case 2: z_ltgr(r1, r1); break; 1717 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1718 default: ShouldNotReachHere(); break; 1719 } 1720 } else { 1721 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1722 switch (casenum) { 1723 case 0: z_chi(r1, x2); break; 1724 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1725 case 2: z_cghi(r1, x2); break; 1726 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1727 default: break; 1728 } 1729 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1730 switch (casenum) { 1731 case 0: z_cfi( r1, x2); break; 1732 case 1: z_clfi(r1, x2); break; 1733 case 2: z_cgfi(r1, x2); break; 1734 case 3: z_clgfi(r1, x2); break; 1735 default: ShouldNotReachHere(); break; 1736 } 1737 } else { 1738 // No instruction with immediate operand possible, so load into register. 1739 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1740 load_const_optimized(scratch, x2); 1741 switch (casenum) { 1742 case 0: z_cr( r1, scratch); break; 1743 case 1: z_clr(r1, scratch); break; 1744 case 2: z_cgr(r1, scratch); break; 1745 case 3: z_clgr(r1, scratch); break; 1746 default: ShouldNotReachHere(); break; 1747 } 1748 } 1749 } 1750 branch_optimized(cond, branch_target); 1751 } 1752 1753 // Generate an optimal compare and branch to the branch target. 1754 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1755 // branch distance is short enough. Loading the target address into a 1756 // register and branching via reg is used as fallback only. 1757 // 1758 // Input: 1759 // r1 - left compare operand 1760 // r2 - right compare operand 1761 void MacroAssembler::compare_and_branch_optimized(Register r1, 1762 Register r2, 1763 Assembler::branch_condition cond, 1764 Label& branch_target, 1765 bool len64, 1766 bool has_sign) { 1767 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1768 1769 if (branch_target.is_bound()) { 1770 address branch_addr = target(branch_target); 1771 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1772 } else { 1773 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1774 switch (casenum) { 1775 case 0: z_crj( r1, r2, cond, branch_target); break; 1776 case 1: z_clrj( r1, r2, cond, branch_target); break; 1777 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1778 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1779 default: ShouldNotReachHere(); break; 1780 } 1781 } else { 1782 switch (casenum) { 1783 case 0: z_cr( r1, r2); break; 1784 case 1: z_clr(r1, r2); break; 1785 case 2: z_cgr(r1, r2); break; 1786 case 3: z_clgr(r1, r2); break; 1787 default: ShouldNotReachHere(); break; 1788 } 1789 branch_optimized(cond, branch_target); 1790 } 1791 } 1792 } 1793 1794 //=========================================================================== 1795 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1796 //=========================================================================== 1797 1798 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1799 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1800 int index = oop_recorder()->allocate_metadata_index(obj); 1801 RelocationHolder rspec = metadata_Relocation::spec(index); 1802 return AddressLiteral((address)obj, rspec); 1803 } 1804 1805 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1806 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1807 int index = oop_recorder()->find_index(obj); 1808 RelocationHolder rspec = metadata_Relocation::spec(index); 1809 return AddressLiteral((address)obj, rspec); 1810 } 1811 1812 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1813 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1814 int oop_index = oop_recorder()->allocate_oop_index(obj); 1815 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1816 } 1817 1818 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1819 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1820 int oop_index = oop_recorder()->find_index(obj); 1821 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1822 } 1823 1824 // NOTE: destroys r 1825 void MacroAssembler::c2bool(Register r, Register t) { 1826 z_lcr(t, r); // t = -r 1827 z_or(r, t); // r = -r OR r 1828 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1829 } 1830 1831 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1832 // and return the resulting instruction. 1833 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1834 // relative positions. 1835 // Use correct argument types. Do not pre-calculate distance. 1836 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1837 int c = 0; 1838 unsigned long patched_inst = 0; 1839 if (is_call_pcrelative_short(inst) || 1840 is_branch_pcrelative_short(inst) || 1841 is_branchoncount_pcrelative_short(inst) || 1842 is_branchonindex32_pcrelative_short(inst)) { 1843 c = 1; 1844 int m = fmask(15, 0); // simm16(-1, 16, 32); 1845 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1846 patched_inst = (inst & ~m) | v; 1847 } else if (is_compareandbranch_pcrelative_short(inst)) { 1848 c = 2; 1849 long m = fmask(31, 16); // simm16(-1, 16, 48); 1850 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1851 patched_inst = (inst & ~m) | v; 1852 } else if (is_branchonindex64_pcrelative_short(inst)) { 1853 c = 3; 1854 long m = fmask(31, 16); // simm16(-1, 16, 48); 1855 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1856 patched_inst = (inst & ~m) | v; 1857 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1858 c = 4; 1859 long m = fmask(31, 0); // simm32(-1, 16, 48); 1860 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1861 patched_inst = (inst & ~m) | v; 1862 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1863 c = 5; 1864 long m = fmask(31, 0); // simm32(-1, 16, 48); 1865 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1866 patched_inst = (inst & ~m) | v; 1867 } else { 1868 print_dbg_msg(tty, inst, "not a relative branch", 0); 1869 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1870 ShouldNotReachHere(); 1871 } 1872 1873 long new_off = get_pcrel_offset(patched_inst); 1874 if (new_off != (dest_pos-inst_pos)) { 1875 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1876 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1877 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1878 #ifdef LUCY_DBG 1879 VM_Version::z_SIGSEGV(); 1880 #endif 1881 ShouldNotReachHere(); 1882 } 1883 return patched_inst; 1884 } 1885 1886 // Only called when binding labels (share/vm/asm/assembler.cpp) 1887 // Pass arguments as intended. Do not pre-calculate distance. 1888 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1889 unsigned long stub_inst; 1890 int inst_len = get_instruction(branch, &stub_inst); 1891 1892 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1893 } 1894 1895 1896 // Extract relative address (aka offset). 1897 // inv_simm16 works for 4-byte instructions only. 1898 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1899 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1900 1901 if (MacroAssembler::is_pcrelative_short(inst)) { 1902 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1903 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1904 } else { 1905 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1906 } 1907 } 1908 1909 if (MacroAssembler::is_pcrelative_long(inst)) { 1910 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1911 } 1912 1913 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1914 #ifdef LUCY_DBG 1915 VM_Version::z_SIGSEGV(); 1916 #else 1917 ShouldNotReachHere(); 1918 #endif 1919 return -1; 1920 } 1921 1922 long MacroAssembler::get_pcrel_offset(address pc) { 1923 unsigned long inst; 1924 unsigned int len = get_instruction(pc, &inst); 1925 1926 #ifdef ASSERT 1927 long offset; 1928 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1929 offset = get_pcrel_offset(inst); 1930 } else { 1931 offset = -1; 1932 } 1933 1934 if (offset == -1) { 1935 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1936 #ifdef LUCY_DBG 1937 VM_Version::z_SIGSEGV(); 1938 #else 1939 ShouldNotReachHere(); 1940 #endif 1941 } 1942 return offset; 1943 #else 1944 return get_pcrel_offset(inst); 1945 #endif // ASSERT 1946 } 1947 1948 // Get target address from pc-relative instructions. 1949 address MacroAssembler::get_target_addr_pcrel(address pc) { 1950 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1951 return pc + get_pcrel_offset(pc); 1952 } 1953 1954 // Patch pc relative load address. 1955 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1956 unsigned long inst; 1957 // Offset is +/- 2**32 -> use long. 1958 ptrdiff_t distance = con - pc; 1959 1960 get_instruction(pc, &inst); 1961 1962 if (is_pcrelative_short(inst)) { 1963 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1964 1965 // Some extra safety net. 1966 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1967 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1968 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1969 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1970 } 1971 return; 1972 } 1973 1974 if (is_pcrelative_long(inst)) { 1975 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1976 1977 // Some Extra safety net. 1978 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1979 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1980 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1981 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1982 } 1983 return; 1984 } 1985 1986 guarantee(false, "not a pcrelative instruction to patch!"); 1987 } 1988 1989 // "Current PC" here means the address just behind the basr instruction. 1990 address MacroAssembler::get_PC(Register result) { 1991 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 1992 return pc(); 1993 } 1994 1995 // Get current PC + offset. 1996 // Offset given in bytes, must be even! 1997 // "Current PC" here means the address of the larl instruction plus the given offset. 1998 address MacroAssembler::get_PC(Register result, int64_t offset) { 1999 address here = pc(); 2000 z_larl(result, offset/2); // Save target instruction address in result. 2001 return here + offset; 2002 } 2003 2004 void MacroAssembler::instr_size(Register size, Register pc) { 2005 // Extract 2 most significant bits of current instruction. 2006 z_llgc(size, Address(pc)); 2007 z_srl(size, 6); 2008 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 2009 z_ahi(size, 3); 2010 z_nill(size, 6); 2011 } 2012 2013 // Resize_frame with SP(new) = SP(old) - [offset]. 2014 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 2015 { 2016 assert_different_registers(offset, fp, Z_SP); 2017 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2018 2019 z_sgr(Z_SP, offset); 2020 z_stg(fp, _z_abi(callers_sp), Z_SP); 2021 } 2022 2023 // Resize_frame with SP(new) = [newSP] + offset. 2024 // This emitter is useful if we already have calculated a pointer 2025 // into the to-be-allocated stack space, e.g. with special alignment properties, 2026 // but need some additional space, e.g. for spilling. 2027 // newSP is the pre-calculated pointer. It must not be modified. 2028 // fp holds, or is filled with, the frame pointer. 2029 // offset is the additional increment which is added to addr to form the new SP. 2030 // Note: specify a negative value to reserve more space! 2031 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2032 // It does not guarantee that fp contains the frame pointer at the end. 2033 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2034 assert_different_registers(newSP, fp, Z_SP); 2035 2036 if (load_fp) { 2037 z_lg(fp, _z_abi(callers_sp), Z_SP); 2038 } 2039 2040 add2reg(Z_SP, offset, newSP); 2041 z_stg(fp, _z_abi(callers_sp), Z_SP); 2042 } 2043 2044 // Resize_frame with SP(new) = [newSP]. 2045 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2046 // It does not guarantee that fp contains the frame pointer at the end. 2047 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2048 assert_different_registers(newSP, fp, Z_SP); 2049 2050 if (load_fp) { 2051 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2052 } 2053 2054 z_lgr(Z_SP, newSP); 2055 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2056 z_stg(fp, _z_abi(callers_sp), newSP); 2057 } else { 2058 z_stg(fp, _z_abi(callers_sp), Z_SP); 2059 } 2060 } 2061 2062 // Resize_frame with SP(new) = SP(old) + offset. 2063 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2064 assert_different_registers(fp, Z_SP); 2065 2066 if (load_fp) { 2067 z_lg(fp, _z_abi(callers_sp), Z_SP); 2068 } 2069 add64(Z_SP, offset); 2070 z_stg(fp, _z_abi(callers_sp), Z_SP); 2071 } 2072 2073 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2074 #ifdef ASSERT 2075 assert_different_registers(bytes, old_sp, Z_SP); 2076 if (!copy_sp) { 2077 z_cgr(old_sp, Z_SP); 2078 asm_assert_eq("[old_sp]!=[Z_SP]", 0x211); 2079 } 2080 #endif 2081 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2082 if (bytes_with_inverted_sign) { 2083 z_agr(Z_SP, bytes); 2084 } else { 2085 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2086 } 2087 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2088 } 2089 2090 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2091 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2092 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2093 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2094 2095 // We must not write outside the current stack bounds (given by Z_SP). 2096 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2097 // We rely on Z_R0 by default to be available as scratch. 2098 z_lgr(scratch, Z_SP); 2099 add2reg(Z_SP, -offset); 2100 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2101 #ifdef ASSERT 2102 // Just make sure nobody uses the value in the default scratch register. 2103 // When another register is used, the caller might rely on it containing the frame pointer. 2104 if (scratch == Z_R0) { 2105 z_iihf(scratch, 0xbaadbabe); 2106 z_iilf(scratch, 0xdeadbeef); 2107 } 2108 #endif 2109 return offset; 2110 } 2111 2112 // Push a frame of size `bytes' plus abi160 on top. 2113 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2114 BLOCK_COMMENT("push_frame_abi160 {"); 2115 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2116 BLOCK_COMMENT("} push_frame_abi160"); 2117 return res; 2118 } 2119 2120 // Pop current C frame. 2121 void MacroAssembler::pop_frame() { 2122 BLOCK_COMMENT("pop_frame:"); 2123 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2124 } 2125 2126 // Pop current C frame and restore return PC register (Z_R14). 2127 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2128 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2129 int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes; 2130 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2131 if (Displacement::is_validDisp(retPC_offset)) { 2132 z_lg(Z_R14, retPC_offset, Z_SP); 2133 add2reg(Z_SP, frame_size_in_bytes); 2134 } else { 2135 add2reg(Z_SP, frame_size_in_bytes); 2136 restore_return_pc(); 2137 } 2138 } 2139 2140 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2141 if (allow_relocation) { 2142 call_c(entry_point); 2143 } else { 2144 call_c_static(entry_point); 2145 } 2146 } 2147 2148 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2149 bool allow_relocation = true; 2150 call_VM_leaf_base(entry_point, allow_relocation); 2151 } 2152 2153 void MacroAssembler::call_VM_base(Register oop_result, 2154 Register last_java_sp, 2155 address entry_point, 2156 bool allow_relocation, 2157 bool check_exceptions) { // Defaults to true. 2158 // Allow_relocation indicates, if true, that the generated code shall 2159 // be fit for code relocation or referenced data relocation. In other 2160 // words: all addresses must be considered variable. PC-relative addressing 2161 // is not possible then. 2162 // On the other hand, if (allow_relocation == false), addresses and offsets 2163 // may be considered stable, enabling us to take advantage of some PC-relative 2164 // addressing tweaks. These might improve performance and reduce code size. 2165 2166 // Determine last_java_sp register. 2167 if (!last_java_sp->is_valid()) { 2168 last_java_sp = Z_SP; // Load Z_SP as SP. 2169 } 2170 2171 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2172 2173 // ARG1 must hold thread address. 2174 z_lgr(Z_ARG1, Z_thread); 2175 2176 address return_pc = NULL; 2177 if (allow_relocation) { 2178 return_pc = call_c(entry_point); 2179 } else { 2180 return_pc = call_c_static(entry_point); 2181 } 2182 2183 reset_last_Java_frame(allow_relocation); 2184 2185 // C++ interp handles this in the interpreter. 2186 check_and_handle_popframe(Z_thread); 2187 check_and_handle_earlyret(Z_thread); 2188 2189 // Check for pending exceptions. 2190 if (check_exceptions) { 2191 // Check for pending exceptions (java_thread is set upon return). 2192 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2193 2194 // This used to conditionally jump to forward_exception however it is 2195 // possible if we relocate that the branch will not reach. So we must jump 2196 // around so we can always reach. 2197 2198 Label ok; 2199 z_bre(ok); // Bcondequal is the same as bcondZero. 2200 call_stub(StubRoutines::forward_exception_entry()); 2201 bind(ok); 2202 } 2203 2204 // Get oop result if there is one and reset the value in the thread. 2205 if (oop_result->is_valid()) { 2206 get_vm_result(oop_result); 2207 } 2208 2209 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2210 } 2211 2212 void MacroAssembler::call_VM_base(Register oop_result, 2213 Register last_java_sp, 2214 address entry_point, 2215 bool check_exceptions) { // Defaults to true. 2216 bool allow_relocation = true; 2217 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2218 } 2219 2220 // VM calls without explicit last_java_sp. 2221 2222 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2223 // Call takes possible detour via InterpreterMacroAssembler. 2224 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2225 } 2226 2227 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2228 // Z_ARG1 is reserved for the thread. 2229 lgr_if_needed(Z_ARG2, arg_1); 2230 call_VM(oop_result, entry_point, check_exceptions); 2231 } 2232 2233 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2234 // Z_ARG1 is reserved for the thread. 2235 lgr_if_needed(Z_ARG2, arg_1); 2236 assert(arg_2 != Z_ARG2, "smashed argument"); 2237 lgr_if_needed(Z_ARG3, arg_2); 2238 call_VM(oop_result, entry_point, check_exceptions); 2239 } 2240 2241 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2242 Register arg_3, bool check_exceptions) { 2243 // Z_ARG1 is reserved for the thread. 2244 lgr_if_needed(Z_ARG2, arg_1); 2245 assert(arg_2 != Z_ARG2, "smashed argument"); 2246 lgr_if_needed(Z_ARG3, arg_2); 2247 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2248 lgr_if_needed(Z_ARG4, arg_3); 2249 call_VM(oop_result, entry_point, check_exceptions); 2250 } 2251 2252 // VM static calls without explicit last_java_sp. 2253 2254 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2255 // Call takes possible detour via InterpreterMacroAssembler. 2256 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2257 } 2258 2259 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2260 Register arg_3, bool check_exceptions) { 2261 // Z_ARG1 is reserved for the thread. 2262 lgr_if_needed(Z_ARG2, arg_1); 2263 assert(arg_2 != Z_ARG2, "smashed argument"); 2264 lgr_if_needed(Z_ARG3, arg_2); 2265 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2266 lgr_if_needed(Z_ARG4, arg_3); 2267 call_VM_static(oop_result, entry_point, check_exceptions); 2268 } 2269 2270 // VM calls with explicit last_java_sp. 2271 2272 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2273 // Call takes possible detour via InterpreterMacroAssembler. 2274 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2275 } 2276 2277 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2278 // Z_ARG1 is reserved for the thread. 2279 lgr_if_needed(Z_ARG2, arg_1); 2280 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2281 } 2282 2283 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2284 Register arg_2, bool check_exceptions) { 2285 // Z_ARG1 is reserved for the thread. 2286 lgr_if_needed(Z_ARG2, arg_1); 2287 assert(arg_2 != Z_ARG2, "smashed argument"); 2288 lgr_if_needed(Z_ARG3, arg_2); 2289 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2290 } 2291 2292 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2293 Register arg_2, Register arg_3, bool check_exceptions) { 2294 // Z_ARG1 is reserved for the thread. 2295 lgr_if_needed(Z_ARG2, arg_1); 2296 assert(arg_2 != Z_ARG2, "smashed argument"); 2297 lgr_if_needed(Z_ARG3, arg_2); 2298 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2299 lgr_if_needed(Z_ARG4, arg_3); 2300 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2301 } 2302 2303 // VM leaf calls. 2304 2305 void MacroAssembler::call_VM_leaf(address entry_point) { 2306 // Call takes possible detour via InterpreterMacroAssembler. 2307 call_VM_leaf_base(entry_point, true); 2308 } 2309 2310 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2311 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2312 call_VM_leaf(entry_point); 2313 } 2314 2315 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2316 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2317 assert(arg_2 != Z_ARG1, "smashed argument"); 2318 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2319 call_VM_leaf(entry_point); 2320 } 2321 2322 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2323 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2324 assert(arg_2 != Z_ARG1, "smashed argument"); 2325 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2326 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2327 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2328 call_VM_leaf(entry_point); 2329 } 2330 2331 // Static VM leaf calls. 2332 // Really static VM leaf calls are never patched. 2333 2334 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2335 // Call takes possible detour via InterpreterMacroAssembler. 2336 call_VM_leaf_base(entry_point, false); 2337 } 2338 2339 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2340 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2341 call_VM_leaf_static(entry_point); 2342 } 2343 2344 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2345 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2346 assert(arg_2 != Z_ARG1, "smashed argument"); 2347 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2348 call_VM_leaf_static(entry_point); 2349 } 2350 2351 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2352 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2353 assert(arg_2 != Z_ARG1, "smashed argument"); 2354 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2355 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2356 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2357 call_VM_leaf_static(entry_point); 2358 } 2359 2360 // Don't use detour via call_c(reg). 2361 address MacroAssembler::call_c(address function_entry) { 2362 load_const(Z_R1, function_entry); 2363 return call(Z_R1); 2364 } 2365 2366 // Variant for really static (non-relocatable) calls which are never patched. 2367 address MacroAssembler::call_c_static(address function_entry) { 2368 load_absolute_address(Z_R1, function_entry); 2369 #if 0 // def ASSERT 2370 // Verify that call site did not move. 2371 load_const_optimized(Z_R0, function_entry); 2372 z_cgr(Z_R1, Z_R0); 2373 z_brc(bcondEqual, 3); 2374 z_illtrap(0xba); 2375 #endif 2376 return call(Z_R1); 2377 } 2378 2379 address MacroAssembler::call_c_opt(address function_entry) { 2380 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2381 _last_calls_return_pc = success ? pc() : NULL; 2382 return _last_calls_return_pc; 2383 } 2384 2385 // Identify a call_far_patchable instruction: LARL + LG + BASR 2386 // 2387 // nop ; optionally, if required for alignment 2388 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2389 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2390 // 2391 // Code pattern will eventually get patched into variant2 (see below for detection code). 2392 // 2393 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2394 address iaddr = instruction_addr; 2395 2396 // Check for the actual load instruction. 2397 if (!is_load_const_from_toc(iaddr)) { return false; } 2398 iaddr += load_const_from_toc_size(); 2399 2400 // Check for the call (BASR) instruction, finally. 2401 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2402 return is_call_byregister(iaddr); 2403 } 2404 2405 // Identify a call_far_patchable instruction: BRASL 2406 // 2407 // Code pattern to suits atomic patching: 2408 // nop ; Optionally, if required for alignment. 2409 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2410 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2411 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2412 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2413 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2414 2415 // Check for correct number of leading nops. 2416 address iaddr; 2417 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2418 if (!is_z_nop(iaddr)) { return false; } 2419 } 2420 assert(iaddr == call_addr, "sanity"); 2421 2422 // --> Check for call instruction. 2423 if (is_call_far_pcrelative(call_addr)) { 2424 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2425 return true; 2426 } 2427 2428 return false; 2429 } 2430 2431 // Emit a NOT mt-safely patchable 64 bit absolute call. 2432 // If toc_offset == -2, then the destination of the call (= target) is emitted 2433 // to the constant pool and a runtime_call relocation is added 2434 // to the code buffer. 2435 // If toc_offset != -2, target must already be in the constant pool at 2436 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2437 // from the runtime_call relocation). 2438 // Special handling of emitting to scratch buffer when there is no constant pool. 2439 // Slightly changed code pattern. We emit an additional nop if we would 2440 // not end emitting at a word aligned address. This is to ensure 2441 // an atomically patchable displacement in brasl instructions. 2442 // 2443 // A call_far_patchable comes in different flavors: 2444 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2445 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2446 // - BRASL (relative address of call target coded in instruction) 2447 // All flavors occupy the same amount of space. Length differences are compensated 2448 // by leading nops, such that the instruction sequence always ends at the same 2449 // byte offset. This is required to keep the return offset constant. 2450 // Furthermore, the return address (the end of the instruction sequence) is forced 2451 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2452 // need to patch the call target of the BRASL flavor. 2453 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2454 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2455 // Get current pc and ensure word alignment for end of instr sequence. 2456 const address start_pc = pc(); 2457 const intptr_t start_off = offset(); 2458 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2459 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2460 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2461 const bool emit_relative_call = !emit_target_to_pool && 2462 RelAddr::is_in_range_of_RelAddr32(dist) && 2463 ReoptimizeCallSequences && 2464 !code_section()->scratch_emit(); 2465 2466 if (emit_relative_call) { 2467 // Add padding to get the same size as below. 2468 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2469 unsigned int current_padding; 2470 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2471 assert(current_padding == padding, "sanity"); 2472 2473 // relative call: len = 2(nop) + 6 (brasl) 2474 // CodeBlob resize cannot occur in this case because 2475 // this call is emitted into pre-existing space. 2476 z_nop(); // Prepend each BRASL with a nop. 2477 z_brasl(Z_R14, target); 2478 } else { 2479 // absolute call: Get address from TOC. 2480 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2481 if (emit_target_to_pool) { 2482 // When emitting the call for the first time, we do not need to use 2483 // the pc-relative version. It will be patched anyway, when the code 2484 // buffer is copied. 2485 // Relocation is not needed when !ReoptimizeCallSequences. 2486 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2487 AddressLiteral dest(target, rt); 2488 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2489 // inst_mark(). Reset if possible. 2490 bool reset_mark = (inst_mark() == pc()); 2491 tocOffset = store_oop_in_toc(dest); 2492 if (reset_mark) { set_inst_mark(); } 2493 if (tocOffset == -1) { 2494 return false; // Couldn't create constant pool entry. 2495 } 2496 } 2497 assert(offset() == start_off, "emit no code before this point!"); 2498 2499 address tocPos = pc() + tocOffset; 2500 if (emit_target_to_pool) { 2501 tocPos = code()->consts()->start() + tocOffset; 2502 } 2503 load_long_pcrelative(Z_R14, tocPos); 2504 z_basr(Z_R14, Z_R14); 2505 } 2506 2507 #ifdef ASSERT 2508 // Assert that we can identify the emitted call. 2509 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2510 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2511 2512 if (emit_target_to_pool) { 2513 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2514 "wrong encoding of dest address"); 2515 } 2516 #endif 2517 return true; // success 2518 } 2519 2520 // Identify a call_far_patchable instruction. 2521 // For more detailed information see header comment of call_far_patchable. 2522 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2523 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2524 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2525 } 2526 2527 // Does the call_far_patchable instruction use a pc-relative encoding 2528 // of the call destination? 2529 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2530 // Variant 2 is pc-relative. 2531 return is_call_far_patchable_variant2_at(instruction_addr); 2532 } 2533 2534 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2535 // Prepend each BRASL with a nop. 2536 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2537 } 2538 2539 // Set destination address of a call_far_patchable instruction. 2540 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2541 ResourceMark rm; 2542 2543 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2544 int code_size = MacroAssembler::call_far_patchable_size(); 2545 CodeBuffer buf(instruction_addr, code_size); 2546 MacroAssembler masm(&buf); 2547 masm.call_far_patchable(dest, tocOffset); 2548 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2549 } 2550 2551 // Get dest address of a call_far_patchable instruction. 2552 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2553 // Dynamic TOC: absolute address in constant pool. 2554 // Check variant2 first, it is more frequent. 2555 2556 // Relative address encoded in call instruction. 2557 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2558 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2559 2560 // Absolute address in constant pool. 2561 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2562 address iaddr = instruction_addr; 2563 2564 long tocOffset = get_load_const_from_toc_offset(iaddr); 2565 address tocLoc = iaddr + tocOffset; 2566 return *(address *)(tocLoc); 2567 } else { 2568 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2569 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2570 *(unsigned long*)instruction_addr, 2571 *(unsigned long*)(instruction_addr+8), 2572 call_far_patchable_size()); 2573 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2574 ShouldNotReachHere(); 2575 return NULL; 2576 } 2577 } 2578 2579 void MacroAssembler::align_call_far_patchable(address pc) { 2580 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2581 } 2582 2583 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2584 } 2585 2586 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2587 } 2588 2589 // Read from the polling page. 2590 // Use TM or TMY instruction, depending on read offset. 2591 // offset = 0: Use TM, safepoint polling. 2592 // offset < 0: Use TMY, profiling safepoint polling. 2593 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2594 if (Immediate::is_uimm12(offset)) { 2595 z_tm(offset, polling_page_address, mask_safepoint); 2596 } else { 2597 z_tmy(offset, polling_page_address, mask_profiling); 2598 } 2599 } 2600 2601 // Check whether z_instruction is a read access to the polling page 2602 // which was emitted by load_from_polling_page(..). 2603 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2604 unsigned long z_instruction; 2605 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2606 2607 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2608 2609 if (ilen == 4) { 2610 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2611 2612 int ms = inv_mask(z_instruction,8,32); // mask 2613 int ra = inv_reg(z_instruction,16,32); // base register 2614 int ds = inv_uimm12(z_instruction); // displacement 2615 2616 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2617 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2618 } 2619 2620 } else { /* if (ilen == 6) */ 2621 2622 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2623 2624 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2625 2626 int ms = inv_mask(z_instruction,8,48); // mask 2627 int ra = inv_reg(z_instruction,16,48); // base register 2628 int ds = inv_simm20(z_instruction); // displacement 2629 } 2630 2631 return true; 2632 } 2633 2634 // Extract poll address from instruction and ucontext. 2635 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2636 assert(ucontext != NULL, "must have ucontext"); 2637 ucontext_t* uc = (ucontext_t*) ucontext; 2638 unsigned long z_instruction; 2639 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2640 2641 if (ilen == 4 && is_z_tm(z_instruction)) { 2642 int ra = inv_reg(z_instruction, 16, 32); // base register 2643 int ds = inv_uimm12(z_instruction); // displacement 2644 address addr = (address)uc->uc_mcontext.gregs[ra]; 2645 return addr + ds; 2646 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2647 int ra = inv_reg(z_instruction, 16, 48); // base register 2648 int ds = inv_simm20(z_instruction); // displacement 2649 address addr = (address)uc->uc_mcontext.gregs[ra]; 2650 return addr + ds; 2651 } 2652 2653 ShouldNotReachHere(); 2654 return NULL; 2655 } 2656 2657 // Extract poll register from instruction. 2658 uint MacroAssembler::get_poll_register(address instr_loc) { 2659 unsigned long z_instruction; 2660 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2661 2662 if (ilen == 4 && is_z_tm(z_instruction)) { 2663 return (uint)inv_reg(z_instruction, 16, 32); // base register 2664 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2665 return (uint)inv_reg(z_instruction, 16, 48); // base register 2666 } 2667 2668 ShouldNotReachHere(); 2669 return 0; 2670 } 2671 2672 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2673 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2674 // Armed page has poll_bit set. 2675 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2676 z_brnaz(slow_path); 2677 } 2678 2679 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2680 void MacroAssembler::bang_stack_with_offset(int offset) { 2681 // Stack grows down, caller passes positive offset. 2682 assert(offset > 0, "must bang with positive offset"); 2683 if (Displacement::is_validDisp(-offset)) { 2684 z_tmy(-offset, Z_SP, mask_stackbang); 2685 } else { 2686 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2687 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2688 } 2689 } 2690 2691 void MacroAssembler::reserved_stack_check(Register return_pc) { 2692 // Test if reserved zone needs to be enabled. 2693 Label no_reserved_zone_enabling; 2694 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2695 BLOCK_COMMENT("reserved_stack_check {"); 2696 2697 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2698 z_brl(no_reserved_zone_enabling); 2699 2700 // Enable reserved zone again, throw stack overflow exception. 2701 save_return_pc(); 2702 push_frame_abi160(0); 2703 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2704 pop_frame(); 2705 restore_return_pc(); 2706 2707 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry()); 2708 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2709 z_br(Z_R1); 2710 2711 should_not_reach_here(); 2712 2713 bind(no_reserved_zone_enabling); 2714 BLOCK_COMMENT("} reserved_stack_check"); 2715 } 2716 2717 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2718 void MacroAssembler::tlab_allocate(Register obj, 2719 Register var_size_in_bytes, 2720 int con_size_in_bytes, 2721 Register t1, 2722 Label& slow_case) { 2723 assert_different_registers(obj, var_size_in_bytes, t1); 2724 Register end = t1; 2725 Register thread = Z_thread; 2726 2727 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2728 if (var_size_in_bytes == noreg) { 2729 z_lay(end, Address(obj, con_size_in_bytes)); 2730 } else { 2731 z_lay(end, Address(obj, var_size_in_bytes)); 2732 } 2733 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2734 branch_optimized(bcondHigh, slow_case); 2735 2736 // Update the tlab top pointer. 2737 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2738 2739 // Recover var_size_in_bytes if necessary. 2740 if (var_size_in_bytes == end) { 2741 z_sgr(var_size_in_bytes, obj); 2742 } 2743 } 2744 2745 // Emitter for interface method lookup. 2746 // input: recv_klass, intf_klass, itable_index 2747 // output: method_result 2748 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2749 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2750 // If the register is still not needed then, remove it. 2751 void MacroAssembler::lookup_interface_method(Register recv_klass, 2752 Register intf_klass, 2753 RegisterOrConstant itable_index, 2754 Register method_result, 2755 Register temp1_reg, 2756 Label& no_such_interface, 2757 bool return_method) { 2758 2759 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2760 const Register itable_entry_addr = Z_R1_scratch; 2761 const Register itable_interface = Z_R0_scratch; 2762 2763 BLOCK_COMMENT("lookup_interface_method {"); 2764 2765 // Load start of itable entries into itable_entry_addr. 2766 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2767 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2768 2769 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2770 const int vtable_base_offset = in_bytes(Klass::vtable_start_offset()); 2771 2772 add2reg_with_index(itable_entry_addr, 2773 vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), 2774 recv_klass, vtable_len); 2775 2776 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2777 Label search; 2778 2779 bind(search); 2780 2781 // Handle IncompatibleClassChangeError. 2782 // If the entry is NULL then we've reached the end of the table 2783 // without finding the expected interface, so throw an exception. 2784 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2785 z_bre(no_such_interface); 2786 2787 add2reg(itable_entry_addr, itable_offset_search_inc); 2788 z_cgr(itable_interface, intf_klass); 2789 z_brne(search); 2790 2791 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2792 if (return_method) { 2793 const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - 2794 itableOffsetEntry::interface_offset_in_bytes()) - 2795 itable_offset_search_inc; 2796 2797 // Compute itableMethodEntry and get method and entry point 2798 // we use addressing with index and displacement, since the formula 2799 // for computing the entry's offset has a fixed and a dynamic part, 2800 // the latter depending on the matched interface entry and on the case, 2801 // that the itable index has been passed as a register, not a constant value. 2802 int method_offset = itableMethodEntry::method_offset_in_bytes(); 2803 // Fixed part (displacement), common operand. 2804 Register itable_offset = method_result; // Dynamic part (index register). 2805 2806 if (itable_index.is_register()) { 2807 // Compute the method's offset in that register, for the formula, see the 2808 // else-clause below. 2809 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2810 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2811 } else { 2812 // Displacement increases. 2813 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2814 2815 // Load index from itable. 2816 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2817 } 2818 2819 // Finally load the method's oop. 2820 z_lg(method_result, method_offset, itable_offset, recv_klass); 2821 } 2822 BLOCK_COMMENT("} lookup_interface_method"); 2823 } 2824 2825 // Lookup for virtual method invocation. 2826 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2827 RegisterOrConstant vtable_index, 2828 Register method_result) { 2829 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2830 assert(vtableEntry::size() * wordSize == wordSize, 2831 "else adjust the scaling in the code below"); 2832 2833 BLOCK_COMMENT("lookup_virtual_method {"); 2834 2835 const int base = in_bytes(Klass::vtable_start_offset()); 2836 2837 if (vtable_index.is_constant()) { 2838 // Load with base + disp. 2839 Address vtable_entry_addr(recv_klass, 2840 vtable_index.as_constant() * wordSize + 2841 base + 2842 vtableEntry::method_offset_in_bytes()); 2843 2844 z_lg(method_result, vtable_entry_addr); 2845 } else { 2846 // Shift index properly and load with base + index + disp. 2847 Register vindex = vtable_index.as_register(); 2848 Address vtable_entry_addr(recv_klass, vindex, 2849 base + vtableEntry::method_offset_in_bytes()); 2850 2851 z_sllg(vindex, vindex, exact_log2(wordSize)); 2852 z_lg(method_result, vtable_entry_addr); 2853 } 2854 BLOCK_COMMENT("} lookup_virtual_method"); 2855 } 2856 2857 // Factor out code to call ic_miss_handler. 2858 // Generate code to call the inline cache miss handler. 2859 // 2860 // In most cases, this code will be generated out-of-line. 2861 // The method parameters are intended to provide some variability. 2862 // ICM - Label which has to be bound to the start of useful code (past any traps). 2863 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2864 // Any value except 0x00 is supported. 2865 // = 0x00 - do not generate illtrap instructions. 2866 // use nops to fill unused space. 2867 // requiredSize - required size of the generated code. If the actually 2868 // generated code is smaller, use padding instructions to fill up. 2869 // = 0 - no size requirement, no padding. 2870 // scratch - scratch register to hold branch target address. 2871 // 2872 // The method returns the code offset of the bound label. 2873 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2874 intptr_t startOffset = offset(); 2875 2876 // Prevent entry at content_begin(). 2877 if (trapMarker != 0) { 2878 z_illtrap(trapMarker); 2879 } 2880 2881 // Load address of inline cache miss code into scratch register 2882 // and branch to cache miss handler. 2883 BLOCK_COMMENT("IC miss handler {"); 2884 BIND(ICM); 2885 unsigned int labelOffset = offset(); 2886 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2887 2888 load_const_optimized(scratch, icmiss); 2889 z_br(scratch); 2890 2891 // Fill unused space. 2892 if (requiredSize > 0) { 2893 while ((offset() - startOffset) < requiredSize) { 2894 if (trapMarker == 0) { 2895 z_nop(); 2896 } else { 2897 z_illtrap(trapMarker); 2898 } 2899 } 2900 } 2901 BLOCK_COMMENT("} IC miss handler"); 2902 return labelOffset; 2903 } 2904 2905 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2906 Register ic_reg = Z_inline_cache; 2907 int klass_offset = oopDesc::klass_offset_in_bytes(); 2908 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2909 if (VM_Version::has_CompareBranch()) { 2910 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2911 } else { 2912 z_ltgr(Z_ARG1, Z_ARG1); 2913 z_bre(ic_miss); 2914 } 2915 } 2916 // Compare cached class against klass from receiver. 2917 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2918 z_brne(ic_miss); 2919 } 2920 2921 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2922 Register super_klass, 2923 Register temp1_reg, 2924 Label* L_success, 2925 Label* L_failure, 2926 Label* L_slow_path, 2927 RegisterOrConstant super_check_offset) { 2928 2929 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2930 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2931 2932 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2933 bool need_slow_path = (must_load_sco || 2934 super_check_offset.constant_or_zero() == sc_offset); 2935 2936 // Input registers must not overlap. 2937 assert_different_registers(sub_klass, super_klass, temp1_reg); 2938 if (super_check_offset.is_register()) { 2939 assert_different_registers(sub_klass, super_klass, 2940 super_check_offset.as_register()); 2941 } else if (must_load_sco) { 2942 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 2943 } 2944 2945 const Register Rsuper_check_offset = temp1_reg; 2946 2947 NearLabel L_fallthrough; 2948 int label_nulls = 0; 2949 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 2950 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 2951 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 2952 assert(label_nulls <= 1 || 2953 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 2954 "at most one NULL in the batch, usually"); 2955 2956 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 2957 // If the pointers are equal, we are done (e.g., String[] elements). 2958 // This self-check enables sharing of secondary supertype arrays among 2959 // non-primary types such as array-of-interface. Otherwise, each such 2960 // type would need its own customized SSA. 2961 // We move this check to the front of the fast path because many 2962 // type checks are in fact trivially successful in this manner, 2963 // so we get a nicely predicted branch right at the start of the check. 2964 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 2965 2966 // Check the supertype display, which is uint. 2967 if (must_load_sco) { 2968 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 2969 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 2970 } 2971 Address super_check_addr(sub_klass, super_check_offset, 0); 2972 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 2973 2974 // This check has worked decisively for primary supers. 2975 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2976 // (Secondary supers are interfaces and very deeply nested subtypes.) 2977 // This works in the same check above because of a tricky aliasing 2978 // between the super_cache and the primary super display elements. 2979 // (The 'super_check_addr' can address either, as the case requires.) 2980 // Note that the cache is updated below if it does not help us find 2981 // what we need immediately. 2982 // So if it was a primary super, we can just fail immediately. 2983 // Otherwise, it's the slow path for us (no success at this point). 2984 2985 // Hacked jmp, which may only be used just before L_fallthrough. 2986 #define final_jmp(label) \ 2987 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2988 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 2989 2990 if (super_check_offset.is_register()) { 2991 branch_optimized(Assembler::bcondEqual, *L_success); 2992 z_cfi(super_check_offset.as_register(), sc_offset); 2993 if (L_failure == &L_fallthrough) { 2994 branch_optimized(Assembler::bcondEqual, *L_slow_path); 2995 } else { 2996 branch_optimized(Assembler::bcondNotEqual, *L_failure); 2997 final_jmp(*L_slow_path); 2998 } 2999 } else if (super_check_offset.as_constant() == sc_offset) { 3000 // Need a slow path; fast failure is impossible. 3001 if (L_slow_path == &L_fallthrough) { 3002 branch_optimized(Assembler::bcondEqual, *L_success); 3003 } else { 3004 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 3005 final_jmp(*L_success); 3006 } 3007 } else { 3008 // No slow path; it's a fast decision. 3009 if (L_failure == &L_fallthrough) { 3010 branch_optimized(Assembler::bcondEqual, *L_success); 3011 } else { 3012 branch_optimized(Assembler::bcondNotEqual, *L_failure); 3013 final_jmp(*L_success); 3014 } 3015 } 3016 3017 bind(L_fallthrough); 3018 #undef local_brc 3019 #undef final_jmp 3020 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3021 // fallthru (to slow path) 3022 } 3023 3024 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3025 Register Rsuperklass, 3026 Register Rarray_ptr, // tmp 3027 Register Rlength, // tmp 3028 Label* L_success, 3029 Label* L_failure) { 3030 // Input registers must not overlap. 3031 // Also check for R1 which is explicitly used here. 3032 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3033 NearLabel L_fallthrough; 3034 int label_nulls = 0; 3035 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 3036 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 3037 assert(label_nulls <= 1, "at most one NULL in the batch"); 3038 3039 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3040 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3041 3042 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3043 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3044 3045 // Hacked jmp, which may only be used just before L_fallthrough. 3046 #define final_jmp(label) \ 3047 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3048 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3049 3050 NearLabel loop_iterate, loop_count, match; 3051 3052 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3053 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3054 3055 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3056 branch_optimized(Assembler::bcondZero, *L_failure); 3057 3058 // Oops in table are NO MORE compressed. 3059 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3060 z_bre(match); // Shortcut for array length = 1. 3061 3062 // No match yet, so we must walk the array's elements. 3063 z_lngfr(Rlength, Rlength); 3064 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3065 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3066 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3067 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3068 z_bru(loop_count); 3069 3070 BIND(loop_iterate); 3071 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3072 z_bre(match); 3073 BIND(loop_count); 3074 z_brxlg(Rlength, Z_R1, loop_iterate); 3075 3076 // Rsuperklass not found among secondary super classes -> failure. 3077 branch_optimized(Assembler::bcondAlways, *L_failure); 3078 3079 // Got a hit. Return success (zero result). Set cache. 3080 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3081 3082 BIND(match); 3083 3084 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3085 3086 final_jmp(*L_success); 3087 3088 // Exit to the surrounding code. 3089 BIND(L_fallthrough); 3090 #undef local_brc 3091 #undef final_jmp 3092 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3093 } 3094 3095 // Emitter for combining fast and slow path. 3096 void MacroAssembler::check_klass_subtype(Register sub_klass, 3097 Register super_klass, 3098 Register temp1_reg, 3099 Register temp2_reg, 3100 Label& L_success) { 3101 NearLabel failure; 3102 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3103 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3104 &L_success, &failure, NULL); 3105 check_klass_subtype_slow_path(sub_klass, super_klass, 3106 temp1_reg, temp2_reg, &L_success, NULL); 3107 BIND(failure); 3108 BLOCK_COMMENT("} check_klass_subtype"); 3109 } 3110 3111 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3112 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); 3113 3114 Label L_fallthrough; 3115 if (L_fast_path == NULL) { 3116 L_fast_path = &L_fallthrough; 3117 } else if (L_slow_path == NULL) { 3118 L_slow_path = &L_fallthrough; 3119 } 3120 3121 // Fast path check: class is fully initialized 3122 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3123 z_bre(*L_fast_path); 3124 3125 // Fast path check: current thread is initializer thread 3126 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3127 if (L_slow_path == &L_fallthrough) { 3128 z_bre(*L_fast_path); 3129 } else if (L_fast_path == &L_fallthrough) { 3130 z_brne(*L_slow_path); 3131 } else { 3132 Unimplemented(); 3133 } 3134 3135 bind(L_fallthrough); 3136 } 3137 3138 // Increment a counter at counter_address when the eq condition code is 3139 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3140 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3141 Label l; 3142 z_brne(l); 3143 load_const(tmp1_reg, counter_address); 3144 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3145 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3146 bind(l); 3147 } 3148 3149 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3150 Register displacedHeader = temp1; 3151 Register currentHeader = temp1; 3152 Register temp = temp2; 3153 NearLabel done, object_has_monitor; 3154 3155 BLOCK_COMMENT("compiler_fast_lock_object {"); 3156 3157 // Load markWord from oop into mark. 3158 z_lg(displacedHeader, 0, oop); 3159 3160 if (DiagnoseSyncOnValueBasedClasses != 0) { 3161 load_klass(Z_R1_scratch, oop); 3162 z_l(Z_R1_scratch, Address(Z_R1_scratch, Klass::access_flags_offset())); 3163 assert((JVM_ACC_IS_VALUE_BASED_CLASS & 0xFFFF) == 0, "or change following instruction"); 3164 z_nilh(Z_R1_scratch, JVM_ACC_IS_VALUE_BASED_CLASS >> 16); 3165 z_brne(done); 3166 } 3167 3168 // Handle existing monitor. 3169 // The object has an existing monitor iff (mark & monitor_value) != 0. 3170 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3171 z_lr(temp, displacedHeader); 3172 z_nill(temp, markWord::monitor_value); 3173 z_brne(object_has_monitor); 3174 3175 // Set mark to markWord | markWord::unlocked_value. 3176 z_oill(displacedHeader, markWord::unlocked_value); 3177 3178 // Load Compare Value application register. 3179 3180 // Initialize the box (must happen before we update the object mark). 3181 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3182 3183 // Memory Fence (in cmpxchgd) 3184 // Compare object markWord with mark and if equal exchange scratch1 with object markWord. 3185 3186 // If the compare-and-swap succeeded, then we found an unlocked object and we 3187 // have now locked it. 3188 z_csg(displacedHeader, box, 0, oop); 3189 assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3190 z_bre(done); 3191 3192 // We did not see an unlocked object so try the fast recursive case. 3193 3194 z_sgr(currentHeader, Z_SP); 3195 load_const_optimized(temp, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); 3196 3197 z_ngr(currentHeader, temp); 3198 // z_brne(done); 3199 // z_release(); 3200 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3201 3202 z_bru(done); 3203 3204 Register zero = temp; 3205 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3206 bind(object_has_monitor); 3207 // The object's monitor m is unlocked iff m->owner == NULL, 3208 // otherwise m->owner may contain a thread or a stack address. 3209 // 3210 // Try to CAS m->owner from NULL to current thread. 3211 z_lghi(zero, 0); 3212 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. 3213 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3214 // Store a non-null value into the box. 3215 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3216 #ifdef ASSERT 3217 z_brne(done); 3218 // We've acquired the monitor, check some invariants. 3219 // Invariant 1: _recursions should be 0. 3220 asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged, 3221 "monitor->_recursions should be 0", -1); 3222 z_ltgr(zero, zero); // Set CR=EQ. 3223 #endif 3224 bind(done); 3225 3226 BLOCK_COMMENT("} compiler_fast_lock_object"); 3227 // If locking was successful, CR should indicate 'EQ'. 3228 // The compiler or the native wrapper generates a branch to the runtime call 3229 // _complete_monitor_locking_Java. 3230 } 3231 3232 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3233 Register displacedHeader = temp1; 3234 Register currentHeader = temp2; 3235 Register temp = temp1; 3236 Register monitor = temp2; 3237 3238 Label done, object_has_monitor; 3239 3240 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3241 3242 // Find the lock address and load the displaced header from the stack. 3243 // if the displaced header is zero, we have a recursive unlock. 3244 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3245 z_bre(done); 3246 3247 // Handle existing monitor. 3248 // The object has an existing monitor iff (mark & monitor_value) != 0. 3249 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); 3250 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3251 z_nill(currentHeader, markWord::monitor_value); 3252 z_brne(object_has_monitor); 3253 3254 // Check if it is still a light weight lock, this is true if we see 3255 // the stack address of the basicLock in the markWord of the object 3256 // copy box to currentHeader such that csg does not kill it. 3257 z_lgr(currentHeader, box); 3258 z_csg(currentHeader, displacedHeader, 0, oop); 3259 z_bru(done); // Csg sets CR as desired. 3260 3261 // Handle existing monitor. 3262 bind(object_has_monitor); 3263 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set. 3264 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3265 z_brne(done); 3266 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3267 z_brne(done); 3268 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3269 z_brne(done); 3270 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3271 z_brne(done); 3272 z_release(); 3273 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3274 3275 bind(done); 3276 3277 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3278 // flag == EQ indicates success 3279 // flag == NE indicates failure 3280 } 3281 3282 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3283 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3284 bs->resolve_jobject(this, value, tmp1, tmp2); 3285 } 3286 3287 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3288 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3289 BLOCK_COMMENT("set_last_Java_frame {"); 3290 3291 // Always set last_Java_pc and flags first because once last_Java_sp 3292 // is visible has_last_Java_frame is true and users will look at the 3293 // rest of the fields. (Note: flags should always be zero before we 3294 // get here so doesn't need to be set.) 3295 3296 // Verify that last_Java_pc was zeroed on return to Java. 3297 if (allow_relocation) { 3298 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3299 Z_thread, 3300 "last_Java_pc not zeroed before leaving Java", 3301 0x200); 3302 } else { 3303 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3304 Z_thread, 3305 "last_Java_pc not zeroed before leaving Java", 3306 0x200); 3307 } 3308 3309 // When returning from calling out from Java mode the frame anchor's 3310 // last_Java_pc will always be set to NULL. It is set here so that 3311 // if we are doing a call to native (not VM) that we capture the 3312 // known pc and don't have to rely on the native call having a 3313 // standard frame linkage where we can find the pc. 3314 if (last_Java_pc!=noreg) { 3315 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3316 } 3317 3318 // This membar release is not required on z/Architecture, since the sequence of stores 3319 // in maintained. Nevertheless, we leave it in to document the required ordering. 3320 // The implementation of z_release() should be empty. 3321 // z_release(); 3322 3323 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3324 BLOCK_COMMENT("} set_last_Java_frame"); 3325 } 3326 3327 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3328 BLOCK_COMMENT("reset_last_Java_frame {"); 3329 3330 if (allow_relocation) { 3331 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3332 Z_thread, 3333 "SP was not set, still zero", 3334 0x202); 3335 } else { 3336 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3337 Z_thread, 3338 "SP was not set, still zero", 3339 0x202); 3340 } 3341 3342 // _last_Java_sp = 0 3343 // Clearing storage must be atomic here, so don't use clear_mem()! 3344 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3345 3346 // _last_Java_pc = 0 3347 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3348 3349 BLOCK_COMMENT("} reset_last_Java_frame"); 3350 return; 3351 } 3352 3353 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3354 assert_different_registers(sp, tmp1); 3355 3356 // We cannot trust that code generated by the C++ compiler saves R14 3357 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3358 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3359 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3360 // it into the frame anchor. 3361 get_PC(tmp1); 3362 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3363 } 3364 3365 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3366 z_release(); 3367 3368 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3369 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3370 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3371 } 3372 3373 void MacroAssembler::get_vm_result(Register oop_result) { 3374 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3375 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3376 3377 verify_oop(oop_result, FILE_AND_LINE); 3378 } 3379 3380 void MacroAssembler::get_vm_result_2(Register result) { 3381 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3382 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3383 } 3384 3385 // We require that C code which does not return a value in vm_result will 3386 // leave it undisturbed. 3387 void MacroAssembler::set_vm_result(Register oop_result) { 3388 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3389 } 3390 3391 // Explicit null checks (used for method handle code). 3392 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3393 if (!ImplicitNullChecks) { 3394 NearLabel ok; 3395 3396 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3397 3398 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3399 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3400 load_absolute_address(reg, exception_entry); 3401 z_br(reg); 3402 3403 bind(ok); 3404 } else { 3405 if (needs_explicit_null_check((intptr_t)offset)) { 3406 // Provoke OS NULL exception if reg = NULL by 3407 // accessing M[reg] w/o changing any registers. 3408 z_lg(tmp, 0, reg); 3409 } 3410 // else 3411 // Nothing to do, (later) access of M[reg + offset] 3412 // will provoke OS NULL exception if reg = NULL. 3413 } 3414 } 3415 3416 //------------------------------------- 3417 // Compressed Klass Pointers 3418 //------------------------------------- 3419 3420 // Klass oop manipulations if compressed. 3421 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3422 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3423 address base = CompressedKlassPointers::base(); 3424 int shift = CompressedKlassPointers::shift(); 3425 bool need_zero_extend = base != 0; 3426 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3427 3428 BLOCK_COMMENT("cKlass encoder {"); 3429 3430 #ifdef ASSERT 3431 Label ok; 3432 z_tmll(current, KlassAlignmentInBytes-1); // Check alignment. 3433 z_brc(Assembler::bcondAllZero, ok); 3434 // The plain disassembler does not recognize illtrap. It instead displays 3435 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3436 // the proper beginning of the next instruction. 3437 z_illtrap(0xee); 3438 z_illtrap(0xee); 3439 bind(ok); 3440 #endif 3441 3442 // Scale down the incoming klass pointer first. 3443 // We then can be sure we calculate an offset that fits into 32 bit. 3444 // More generally speaking: all subsequent calculations are purely 32-bit. 3445 if (shift != 0) { 3446 assert (LogKlassAlignmentInBytes == shift, "decode alg wrong"); 3447 z_srlg(dst, current, shift); 3448 current = dst; 3449 } 3450 3451 if (base != NULL) { 3452 // Use scaled-down base address parts to match scaled-down klass pointer. 3453 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3454 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3455 3456 // General considerations: 3457 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3458 // Otherwise, we would end up with a compressed klass pointer which doesn't 3459 // fit into 32-bit. 3460 // - Only bit#33 of the difference could potentially be non-zero. For that 3461 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3462 // will create a borrow out of bit#32, nicely killing bit#33. 3463 // - With the above, we only need to consider current_l and base_l to 3464 // calculate the result. 3465 // - Both values are treated as unsigned. The unsigned subtraction is 3466 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3467 3468 if (base_l == 0) { 3469 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3470 // cancel all high-word bits. Otherwise, we would end up with an offset 3471 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3472 // - current_l remains unchanged. 3473 // - Therefore, we can replace all calculation with just a 3474 // zero-extending load 32 to 64 bit. 3475 // - Even that can be replaced with a conditional load if dst != current. 3476 // (this is a local view. The shift step may have requested zero-extension). 3477 } else { 3478 if ((base_h == 0) && is_uimm(base_l, 31)) { 3479 // If we happen to find that (base_h == 0), and that base_l is within the range 3480 // which can be represented by a signed int, then we can use 64bit signed add with 3481 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3482 // upper 32 bits of the result, saving us the need of an extra zero extension. 3483 // For base_l to be in the required range, it must not have the most significant 3484 // bit (aka sign bit) set. 3485 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3486 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3487 need_zero_extend = false; 3488 current = dst; 3489 } else { 3490 // To begin with, we may need to copy and/or zero-extend the register operand. 3491 // We have to calculate (current_l - base_l). Because there is no unsigend 3492 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3493 if (need_zero_extend) { 3494 z_llgfr(dst, current); 3495 need_zero_extend = false; 3496 } else { 3497 llgfr_if_needed(dst, current); 3498 } 3499 current = dst; 3500 z_alfi(dst, -base_l); 3501 } 3502 } 3503 } 3504 3505 if (need_zero_extend) { 3506 // We must zero-extend the calculated result. It may have some leftover bits in 3507 // the hi-word because we only did optimized calculations. 3508 z_llgfr(dst, current); 3509 } else { 3510 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3511 } 3512 3513 BLOCK_COMMENT("} cKlass encoder"); 3514 } 3515 3516 // This function calculates the size of the code generated by 3517 // decode_klass_not_null(register dst, Register src) 3518 // when (Universe::heap() != NULL). Hence, if the instructions 3519 // it generates change, then this method needs to be updated. 3520 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3521 address base = CompressedKlassPointers::base(); 3522 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3523 int addbase_size = 0; 3524 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3525 3526 if (base != NULL) { 3527 unsigned int base_h = ((unsigned long)base)>>32; 3528 unsigned int base_l = (unsigned int)((unsigned long)base); 3529 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3530 addbase_size += 6; /* aih */ 3531 } else if ((base_h == 0) && (base_l != 0)) { 3532 addbase_size += 6; /* algfi */ 3533 } else { 3534 addbase_size += load_const_size(); 3535 addbase_size += 4; /* algr */ 3536 } 3537 } 3538 #ifdef ASSERT 3539 addbase_size += 10; 3540 addbase_size += 2; // Extra sigill. 3541 #endif 3542 return addbase_size + shift_size; 3543 } 3544 3545 // !!! If the instructions that get generated here change 3546 // then function instr_size_for_decode_klass_not_null() 3547 // needs to get updated. 3548 // This variant of decode_klass_not_null() must generate predictable code! 3549 // The code must only depend on globally known parameters. 3550 void MacroAssembler::decode_klass_not_null(Register dst) { 3551 address base = CompressedKlassPointers::base(); 3552 int shift = CompressedKlassPointers::shift(); 3553 int beg_off = offset(); 3554 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3555 3556 BLOCK_COMMENT("cKlass decoder (const size) {"); 3557 3558 if (shift != 0) { // Shift required? 3559 z_sllg(dst, dst, shift); 3560 } 3561 if (base != NULL) { 3562 unsigned int base_h = ((unsigned long)base)>>32; 3563 unsigned int base_l = (unsigned int)((unsigned long)base); 3564 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3565 z_aih(dst, base_h); // Base has no set bits in lower half. 3566 } else if ((base_h == 0) && (base_l != 0)) { 3567 z_algfi(dst, base_l); // Base has no set bits in upper half. 3568 } else { 3569 load_const(Z_R0, base); // Base has set bits everywhere. 3570 z_algr(dst, Z_R0); 3571 } 3572 } 3573 3574 #ifdef ASSERT 3575 Label ok; 3576 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3577 z_brc(Assembler::bcondAllZero, ok); 3578 // The plain disassembler does not recognize illtrap. It instead displays 3579 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3580 // the proper beginning of the next instruction. 3581 z_illtrap(0xd1); 3582 z_illtrap(0xd1); 3583 bind(ok); 3584 #endif 3585 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 3586 3587 BLOCK_COMMENT("} cKlass decoder (const size)"); 3588 } 3589 3590 // This variant of decode_klass_not_null() is for cases where 3591 // 1) the size of the generated instructions may vary 3592 // 2) the result is (potentially) stored in a register different from the source. 3593 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3594 address base = CompressedKlassPointers::base(); 3595 int shift = CompressedKlassPointers::shift(); 3596 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3597 3598 BLOCK_COMMENT("cKlass decoder {"); 3599 3600 if (src == noreg) src = dst; 3601 3602 if (shift != 0) { // Shift or at least move required? 3603 z_sllg(dst, src, shift); 3604 } else { 3605 lgr_if_needed(dst, src); 3606 } 3607 3608 if (base != NULL) { 3609 unsigned int base_h = ((unsigned long)base)>>32; 3610 unsigned int base_l = (unsigned int)((unsigned long)base); 3611 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3612 z_aih(dst, base_h); // Base has not set bits in lower half. 3613 } else if ((base_h == 0) && (base_l != 0)) { 3614 z_algfi(dst, base_l); // Base has no set bits in upper half. 3615 } else { 3616 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 3617 z_algr(dst, Z_R0); 3618 } 3619 } 3620 3621 #ifdef ASSERT 3622 Label ok; 3623 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3624 z_brc(Assembler::bcondAllZero, ok); 3625 // The plain disassembler does not recognize illtrap. It instead displays 3626 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3627 // the proper beginning of the next instruction. 3628 z_illtrap(0xd2); 3629 z_illtrap(0xd2); 3630 bind(ok); 3631 #endif 3632 BLOCK_COMMENT("} cKlass decoder"); 3633 } 3634 3635 void MacroAssembler::load_klass(Register klass, Address mem) { 3636 if (UseCompressedClassPointers) { 3637 z_llgf(klass, mem); 3638 // Attention: no null check here! 3639 decode_klass_not_null(klass); 3640 } else { 3641 z_lg(klass, mem); 3642 } 3643 } 3644 3645 void MacroAssembler::load_klass(Register klass, Register src_oop) { 3646 if (UseCompressedClassPointers) { 3647 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3648 // Attention: no null check here! 3649 decode_klass_not_null(klass); 3650 } else { 3651 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3652 } 3653 } 3654 3655 void MacroAssembler::load_klass_check_null(Register klass, Register src_oop, Register tmp) { 3656 null_check(src_oop, tmp, oopDesc::klass_offset_in_bytes()); 3657 load_klass(klass, src_oop); 3658 } 3659 3660 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 3661 if (UseCompressedClassPointers) { 3662 assert_different_registers(dst_oop, klass, Z_R0); 3663 if (ck == noreg) ck = klass; 3664 encode_klass_not_null(ck, klass); 3665 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3666 } else { 3667 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3668 } 3669 } 3670 3671 void MacroAssembler::store_klass_gap(Register s, Register d) { 3672 if (UseCompressedClassPointers) { 3673 assert(s != d, "not enough registers"); 3674 // Support s = noreg. 3675 if (s != noreg) { 3676 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 3677 } else { 3678 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 3679 } 3680 } 3681 } 3682 3683 // Compare klass ptr in memory against klass ptr in register. 3684 // 3685 // Rop1 - klass in register, always uncompressed. 3686 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 3687 // Rbase - Base address of cKlass in memory. 3688 // maybeNULL - True if Rop1 possibly is a NULL. 3689 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) { 3690 3691 BLOCK_COMMENT("compare klass ptr {"); 3692 3693 if (UseCompressedClassPointers) { 3694 const int shift = CompressedKlassPointers::shift(); 3695 address base = CompressedKlassPointers::base(); 3696 3697 assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift"); 3698 assert_different_registers(Rop1, Z_R0); 3699 assert_different_registers(Rop1, Rbase, Z_R1); 3700 3701 // First encode register oop and then compare with cOop in memory. 3702 // This sequence saves an unnecessary cOop load and decode. 3703 if (base == NULL) { 3704 if (shift == 0) { 3705 z_cl(Rop1, disp, Rbase); // Unscaled 3706 } else { 3707 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3708 z_cl(Z_R0, disp, Rbase); 3709 } 3710 } else { // HeapBased 3711 #ifdef ASSERT 3712 bool used_R0 = true; 3713 bool used_R1 = true; 3714 #endif 3715 Register current = Rop1; 3716 Label done; 3717 3718 if (maybeNULL) { // NULL ptr must be preserved! 3719 z_ltgr(Z_R0, current); 3720 z_bre(done); 3721 current = Z_R0; 3722 } 3723 3724 unsigned int base_h = ((unsigned long)base)>>32; 3725 unsigned int base_l = (unsigned int)((unsigned long)base); 3726 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3727 lgr_if_needed(Z_R0, current); 3728 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 3729 } else if ((base_h == 0) && (base_l != 0)) { 3730 lgr_if_needed(Z_R0, current); 3731 z_agfi(Z_R0, -(int)base_l); 3732 } else { 3733 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3734 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 3735 } 3736 3737 if (shift != 0) { 3738 z_srlg(Z_R0, Z_R0, shift); 3739 } 3740 bind(done); 3741 z_cl(Z_R0, disp, Rbase); 3742 #ifdef ASSERT 3743 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3744 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3745 #endif 3746 } 3747 } else { 3748 z_clg(Rop1, disp, Z_R0, Rbase); 3749 } 3750 BLOCK_COMMENT("} compare klass ptr"); 3751 } 3752 3753 //--------------------------- 3754 // Compressed oops 3755 //--------------------------- 3756 3757 void MacroAssembler::encode_heap_oop(Register oop) { 3758 oop_encoder(oop, oop, true /*maybe null*/); 3759 } 3760 3761 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 3762 oop_encoder(oop, oop, false /*not null*/); 3763 } 3764 3765 // Called with something derived from the oop base. e.g. oop_base>>3. 3766 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 3767 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 3768 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 3769 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 3770 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 3771 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 3772 + (oop_base_lh == 0 ? 0:1) 3773 + (oop_base_hl == 0 ? 0:1) 3774 + (oop_base_hh == 0 ? 0:1); 3775 3776 assert(oop_base != 0, "This is for HeapBased cOops only"); 3777 3778 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 3779 uint64_t pow2_offset = 0x10000 - oop_base_ll; 3780 if (pow2_offset < 0x8000) { // This might not be necessary. 3781 uint64_t oop_base2 = oop_base + pow2_offset; 3782 3783 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 3784 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 3785 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 3786 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 3787 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 3788 (oop_base_lh == 0 ? 0:1) + 3789 (oop_base_hl == 0 ? 0:1) + 3790 (oop_base_hh == 0 ? 0:1); 3791 if (n_notzero_parts == 1) { 3792 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 3793 return -pow2_offset; 3794 } 3795 } 3796 } 3797 return 0; 3798 } 3799 3800 // If base address is offset from a straight power of two by just a few pages, 3801 // return this offset to the caller for a possible later composite add. 3802 // TODO/FIX: will only work correctly for 4k pages. 3803 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 3804 int pow2_offset = get_oop_base_pow2_offset(oop_base); 3805 3806 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 3807 3808 return pow2_offset; 3809 } 3810 3811 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 3812 int offset = get_oop_base(Rbase, oop_base); 3813 z_lcgr(Rbase, Rbase); 3814 return -offset; 3815 } 3816 3817 // Compare compressed oop in memory against oop in register. 3818 // Rop1 - Oop in register. 3819 // disp - Offset of cOop in memory. 3820 // Rbase - Base address of cOop in memory. 3821 // maybeNULL - True if Rop1 possibly is a NULL. 3822 // maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction. 3823 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) { 3824 Register Rbase = mem.baseOrR0(); 3825 Register Rindex = mem.indexOrR0(); 3826 int64_t disp = mem.disp(); 3827 3828 const int shift = CompressedOops::shift(); 3829 address base = CompressedOops::base(); 3830 3831 assert(UseCompressedOops, "must be on to call this method"); 3832 assert(Universe::heap() != NULL, "java heap must be initialized to call this method"); 3833 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3834 assert_different_registers(Rop1, Z_R0); 3835 assert_different_registers(Rop1, Rbase, Z_R1); 3836 assert_different_registers(Rop1, Rindex, Z_R1); 3837 3838 BLOCK_COMMENT("compare heap oop {"); 3839 3840 // First encode register oop and then compare with cOop in memory. 3841 // This sequence saves an unnecessary cOop load and decode. 3842 if (base == NULL) { 3843 if (shift == 0) { 3844 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 3845 } else { 3846 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3847 z_cl(Z_R0, disp, Rindex, Rbase); 3848 } 3849 } else { // HeapBased 3850 #ifdef ASSERT 3851 bool used_R0 = true; 3852 bool used_R1 = true; 3853 #endif 3854 Label done; 3855 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3856 3857 if (maybeNULL) { // NULL ptr must be preserved! 3858 z_ltgr(Z_R0, Rop1); 3859 z_bre(done); 3860 } 3861 3862 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 3863 z_srlg(Z_R0, Z_R0, shift); 3864 3865 bind(done); 3866 z_cl(Z_R0, disp, Rindex, Rbase); 3867 #ifdef ASSERT 3868 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3869 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3870 #endif 3871 } 3872 BLOCK_COMMENT("} compare heap oop"); 3873 } 3874 3875 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 3876 const Address& addr, Register val, 3877 Register tmp1, Register tmp2, Register tmp3) { 3878 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3879 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 3880 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3881 decorators = AccessInternal::decorator_fixup(decorators, type); 3882 bool as_raw = (decorators & AS_RAW) != 0; 3883 if (as_raw) { 3884 bs->BarrierSetAssembler::store_at(this, decorators, type, 3885 addr, val, 3886 tmp1, tmp2, tmp3); 3887 } else { 3888 bs->store_at(this, decorators, type, 3889 addr, val, 3890 tmp1, tmp2, tmp3); 3891 } 3892 } 3893 3894 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 3895 const Address& addr, Register dst, 3896 Register tmp1, Register tmp2, Label *is_null) { 3897 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3898 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 3899 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3900 decorators = AccessInternal::decorator_fixup(decorators, type); 3901 bool as_raw = (decorators & AS_RAW) != 0; 3902 if (as_raw) { 3903 bs->BarrierSetAssembler::load_at(this, decorators, type, 3904 addr, dst, 3905 tmp1, tmp2, is_null); 3906 } else { 3907 bs->load_at(this, decorators, type, 3908 addr, dst, 3909 tmp1, tmp2, is_null); 3910 } 3911 } 3912 3913 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 3914 Register tmp1, Register tmp2, 3915 DecoratorSet decorators, Label *is_null) { 3916 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 3917 } 3918 3919 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 3920 Register tmp1, Register tmp2, Register tmp3, 3921 DecoratorSet decorators) { 3922 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 3923 } 3924 3925 //------------------------------------------------- 3926 // Encode compressed oop. Generally usable encoder. 3927 //------------------------------------------------- 3928 // Rsrc - contains regular oop on entry. It remains unchanged. 3929 // Rdst - contains compressed oop on exit. 3930 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 3931 // 3932 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 3933 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 3934 // 3935 // only32bitValid is set, if later code only uses the lower 32 bits. In this 3936 // case we must not fix the upper 32 bits. 3937 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL, 3938 Register Rbase, int pow2_offset, bool only32bitValid) { 3939 3940 const address oop_base = CompressedOops::base(); 3941 const int oop_shift = CompressedOops::shift(); 3942 const bool disjoint = CompressedOops::base_disjoint(); 3943 3944 assert(UseCompressedOops, "must be on to call this method"); 3945 assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder"); 3946 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3947 3948 if (disjoint || (oop_base == NULL)) { 3949 BLOCK_COMMENT("cOop encoder zeroBase {"); 3950 if (oop_shift == 0) { 3951 if (oop_base != NULL && !only32bitValid) { 3952 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 3953 } else { 3954 lgr_if_needed(Rdst, Rsrc); 3955 } 3956 } else { 3957 z_srlg(Rdst, Rsrc, oop_shift); 3958 if (oop_base != NULL && !only32bitValid) { 3959 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 3960 } 3961 } 3962 BLOCK_COMMENT("} cOop encoder zeroBase"); 3963 return; 3964 } 3965 3966 bool used_R0 = false; 3967 bool used_R1 = false; 3968 3969 BLOCK_COMMENT("cOop encoder general {"); 3970 assert_different_registers(Rdst, Z_R1); 3971 assert_different_registers(Rsrc, Rbase); 3972 if (maybeNULL) { 3973 Label done; 3974 // We reorder shifting and subtracting, so that we can compare 3975 // and shift in parallel: 3976 // 3977 // cycle 0: potential LoadN, base = <const> 3978 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 3979 // cycle 2: if (cr) br, dst = dst + base + offset 3980 3981 // Get oop_base components. 3982 if (pow2_offset == -1) { 3983 if (Rdst == Rbase) { 3984 if (Rdst == Z_R1 || Rsrc == Z_R1) { 3985 Rbase = Z_R0; 3986 used_R0 = true; 3987 } else { 3988 Rdst = Z_R1; 3989 used_R1 = true; 3990 } 3991 } 3992 if (Rbase == Z_R1) { 3993 used_R1 = true; 3994 } 3995 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 3996 } 3997 assert_different_registers(Rdst, Rbase); 3998 3999 // Check for NULL oop (must be left alone) and shift. 4000 if (oop_shift != 0) { // Shift out alignment bits 4001 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 4002 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4003 } else { 4004 z_srlg(Rdst, Rsrc, oop_shift); 4005 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 4006 // This probably is faster, as it does not write a register. No! 4007 // z_cghi(Rsrc, 0); 4008 } 4009 } else { 4010 z_ltgr(Rdst, Rsrc); // Move NULL to result register. 4011 } 4012 z_bre(done); 4013 4014 // Subtract oop_base components. 4015 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 4016 z_algr(Rdst, Rbase); 4017 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 4018 } else { 4019 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4020 } 4021 if (!only32bitValid) { 4022 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4023 } 4024 bind(done); 4025 4026 } else { // not null 4027 // Get oop_base components. 4028 if (pow2_offset == -1) { 4029 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4030 } 4031 4032 // Subtract oop_base components and shift. 4033 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4034 // Don't use lay instruction. 4035 if (Rdst == Rsrc) { 4036 z_algr(Rdst, Rbase); 4037 } else { 4038 lgr_if_needed(Rdst, Rbase); 4039 z_algr(Rdst, Rsrc); 4040 } 4041 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4042 } else { 4043 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4044 } 4045 if (oop_shift != 0) { // Shift out alignment bits. 4046 z_srlg(Rdst, Rdst, oop_shift); 4047 } 4048 if (!only32bitValid) { 4049 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4050 } 4051 } 4052 #ifdef ASSERT 4053 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4054 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4055 #endif 4056 BLOCK_COMMENT("} cOop encoder general"); 4057 } 4058 4059 //------------------------------------------------- 4060 // decode compressed oop. Generally usable decoder. 4061 //------------------------------------------------- 4062 // Rsrc - contains compressed oop on entry. 4063 // Rdst - contains regular oop on exit. 4064 // Rdst and Rsrc may indicate same register. 4065 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4066 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4067 // Rbase - register to use for the base 4068 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4069 // For performance, it is good to 4070 // - avoid Z_R0 for any of the argument registers. 4071 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4072 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4073 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) { 4074 4075 const address oop_base = CompressedOops::base(); 4076 const int oop_shift = CompressedOops::shift(); 4077 const bool disjoint = CompressedOops::base_disjoint(); 4078 4079 assert(UseCompressedOops, "must be on to call this method"); 4080 assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder"); 4081 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4082 "cOop encoder detected bad shift"); 4083 4084 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4085 4086 if (oop_base != NULL) { 4087 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4088 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4089 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4090 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4091 BLOCK_COMMENT("cOop decoder disjointBase {"); 4092 // We do not need to load the base. Instead, we can install the upper bits 4093 // with an OR instead of an ADD. 4094 Label done; 4095 4096 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4097 if (maybeNULL) { // NULL ptr must be preserved! 4098 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4099 z_bre(done); 4100 } else { 4101 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4102 } 4103 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4104 z_oihf(Rdst, oop_base_hf); 4105 } else if (oop_base_hl != 0) { 4106 z_oihl(Rdst, oop_base_hl); 4107 } else { 4108 assert(oop_base_hh != 0, "not heapbased mode"); 4109 z_oihh(Rdst, oop_base_hh); 4110 } 4111 bind(done); 4112 BLOCK_COMMENT("} cOop decoder disjointBase"); 4113 } else { 4114 BLOCK_COMMENT("cOop decoder general {"); 4115 // There are three decode steps: 4116 // scale oop offset (shift left) 4117 // get base (in reg) and pow2_offset (constant) 4118 // add base, pow2_offset, and oop offset 4119 // The following register overlap situations may exist: 4120 // Rdst == Rsrc, Rbase any other 4121 // not a problem. Scaling in-place leaves Rbase undisturbed. 4122 // Loading Rbase does not impact the scaled offset. 4123 // Rdst == Rbase, Rsrc any other 4124 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4125 // would destroy the scaled offset. 4126 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4127 // use Rbase_tmp if base has to be loaded. 4128 // Rsrc == Rbase, Rdst any other 4129 // Only possible without preloaded Rbase. 4130 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4131 // Rsrc == Rbase, Rdst == Rbase 4132 // Only possible without preloaded Rbase. 4133 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4134 // Remedy: use Rbase_tmp. 4135 // 4136 Label done; 4137 Register Rdst_tmp = Rdst; 4138 Register Rbase_tmp = Rbase; 4139 bool used_R0 = false; 4140 bool used_R1 = false; 4141 bool base_preloaded = pow2_offset >= 0; 4142 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4143 assert(oop_shift != 0, "room for optimization"); 4144 4145 // Check if we need to use scratch registers. 4146 if (Rdst == Rbase) { 4147 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4148 if (Rdst != Rsrc) { 4149 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4150 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4151 } else { 4152 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4153 } 4154 } 4155 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4156 4157 // Scale oop and check for NULL. 4158 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4159 if (maybeNULL) { // NULL ptr must be preserved! 4160 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4161 z_bre(done); 4162 } else { 4163 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4164 } 4165 4166 // Get oop_base components. 4167 if (!base_preloaded) { 4168 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4169 } 4170 4171 // Add up all components. 4172 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4173 z_algr(Rdst_tmp, Rbase_tmp); 4174 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4175 } else { 4176 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4177 } 4178 4179 bind(done); 4180 lgr_if_needed(Rdst, Rdst_tmp); 4181 #ifdef ASSERT 4182 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4183 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4184 #endif 4185 BLOCK_COMMENT("} cOop decoder general"); 4186 } 4187 } else { 4188 BLOCK_COMMENT("cOop decoder zeroBase {"); 4189 if (oop_shift == 0) { 4190 lgr_if_needed(Rdst, Rsrc); 4191 } else { 4192 z_sllg(Rdst, Rsrc, oop_shift); 4193 } 4194 BLOCK_COMMENT("} cOop decoder zeroBase"); 4195 } 4196 } 4197 4198 // ((OopHandle)result).resolve(); 4199 void MacroAssembler::resolve_oop_handle(Register result) { 4200 // OopHandle::resolve is an indirection. 4201 z_lg(result, 0, result); 4202 } 4203 4204 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4205 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4206 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes())); 4207 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4208 resolve_oop_handle(mirror); 4209 } 4210 4211 void MacroAssembler::load_method_holder(Register holder, Register method) { 4212 mem2reg_opt(holder, Address(method, Method::const_offset())); 4213 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4214 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); 4215 } 4216 4217 //--------------------------------------------------------------- 4218 //--- Operations on arrays. 4219 //--------------------------------------------------------------- 4220 4221 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4222 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4223 // work registers anyway. 4224 // Actually, only r0, r1, and r5 are killed. 4225 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4226 4227 int block_start = offset(); 4228 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4229 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4230 4231 Label doXC, doMVCLE, done; 4232 4233 BLOCK_COMMENT("Clear_Array {"); 4234 4235 // Check for zero len and convert to long. 4236 z_ltgfr(odd_tmp_reg, cnt_arg); 4237 z_bre(done); // Nothing to do if len == 0. 4238 4239 // Prefetch data to be cleared. 4240 if (VM_Version::has_Prefetch()) { 4241 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4242 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4243 } 4244 4245 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4246 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4247 z_brnh(doXC); // If so, use executed XC to clear. 4248 4249 // MVCLE: initialize long arrays (general case). 4250 bind(doMVCLE); 4251 z_lgr(dst_addr, base_pointer_arg); 4252 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4253 // The even register of the register pair is not killed. 4254 clear_reg(odd_tmp_reg, true, false); 4255 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4256 z_bru(done); 4257 4258 // XC: initialize short arrays. 4259 Label XC_template; // Instr template, never exec directly! 4260 bind(XC_template); 4261 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4262 4263 bind(doXC); 4264 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4265 if (VM_Version::has_ExecuteExtensions()) { 4266 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4267 } else { 4268 z_larl(odd_tmp_reg, XC_template); 4269 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4270 } 4271 // z_bru(done); // fallthru 4272 4273 bind(done); 4274 4275 BLOCK_COMMENT("} Clear_Array"); 4276 4277 int block_end = offset(); 4278 return block_end - block_start; 4279 } 4280 4281 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4282 // Emitter does not KILL any arguments nor work registers. 4283 // Emitter generates up to 16 XC instructions, depending on the array length. 4284 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4285 int block_start = offset(); 4286 int off; 4287 int lineSize_Bytes = AllocatePrefetchStepSize; 4288 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4289 bool doPrefetch = VM_Version::has_Prefetch(); 4290 int XC_maxlen = 256; 4291 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4292 4293 BLOCK_COMMENT("Clear_Array_Const {"); 4294 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4295 4296 // Do less prefetching for very short arrays. 4297 if (numXCInstr > 0) { 4298 // Prefetch only some cache lines, then begin clearing. 4299 if (doPrefetch) { 4300 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4301 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4302 } else { 4303 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4304 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4305 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4306 } 4307 } 4308 } 4309 4310 for (off=0; off<(numXCInstr-1); off++) { 4311 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4312 4313 // Prefetch some cache lines in advance. 4314 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4315 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4316 } 4317 } 4318 if (off*XC_maxlen < cnt*BytesPerWord) { 4319 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4320 } 4321 } 4322 BLOCK_COMMENT("} Clear_Array_Const"); 4323 4324 int block_end = offset(); 4325 return block_end - block_start; 4326 } 4327 4328 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4329 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4330 // work registers anyway. 4331 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4332 // 4333 // For very large arrays, exploit MVCLE H/W support. 4334 // MVCLE instruction automatically exploits H/W-optimized page mover. 4335 // - Bytes up to next page boundary are cleared with a series of XC to self. 4336 // - All full pages are cleared with the page mover H/W assist. 4337 // - Remaining bytes are again cleared by a series of XC to self. 4338 // 4339 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4340 4341 int block_start = offset(); 4342 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4343 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4344 4345 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4346 4347 // Get len to clear. 4348 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4349 4350 // Prepare other args to MVCLE. 4351 z_lgr(dst_addr, base_pointer_arg); 4352 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4353 // The even register of the register pair is not killed. 4354 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4355 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4356 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4357 4358 int block_end = offset(); 4359 return block_end - block_start; 4360 } 4361 4362 // Allocator. 4363 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4364 Register cnt_reg, 4365 Register tmp1_reg, Register tmp2_reg) { 4366 // Tmp1 is oddReg. 4367 // Tmp2 is evenReg. 4368 4369 int block_start = offset(); 4370 Label doMVC, doMVCLE, done, MVC_template; 4371 4372 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4373 4374 // Check for zero len and convert to long. 4375 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4376 z_bre(done); // Nothing to do if len == 0. 4377 4378 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4379 4380 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4381 z_brnh(doMVC); // If so, use executed MVC to clear. 4382 4383 bind(doMVCLE); // A lot of data (more than 256 bytes). 4384 // Prep dest reg pair. 4385 z_lgr(Z_R0, dst_reg); // dst addr 4386 // Dst len already in Z_R1. 4387 // Prep src reg pair. 4388 z_lgr(tmp2_reg, src_reg); // src addr 4389 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4390 4391 // Do the copy. 4392 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4393 z_bru(done); // All done. 4394 4395 bind(MVC_template); // Just some data (not more than 256 bytes). 4396 z_mvc(0, 0, dst_reg, 0, src_reg); 4397 4398 bind(doMVC); 4399 4400 if (VM_Version::has_ExecuteExtensions()) { 4401 add2reg(Z_R1, -1); 4402 } else { 4403 add2reg(tmp1_reg, -1, Z_R1); 4404 z_larl(Z_R1, MVC_template); 4405 } 4406 4407 if (VM_Version::has_Prefetch()) { 4408 z_pfd(1, 0,Z_R0,src_reg); 4409 z_pfd(2, 0,Z_R0,dst_reg); 4410 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4411 // z_pfd(2,256,Z_R0,dst_reg); 4412 } 4413 4414 if (VM_Version::has_ExecuteExtensions()) { 4415 z_exrl(Z_R1, MVC_template); 4416 } else { 4417 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4418 } 4419 4420 bind(done); 4421 4422 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4423 4424 int block_end = offset(); 4425 return block_end - block_start; 4426 } 4427 4428 //------------------------------------------------- 4429 // Constants (scalar and oop) in constant pool 4430 //------------------------------------------------- 4431 4432 // Add a non-relocated constant to the CP. 4433 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4434 long value = val.value(); 4435 address tocPos = long_constant(value); 4436 4437 if (tocPos != NULL) { 4438 int tocOffset = (int)(tocPos - code()->consts()->start()); 4439 return tocOffset; 4440 } 4441 // Address_constant returned NULL, so no constant entry has been created. 4442 // In that case, we return a "fatal" offset, just in case that subsequently 4443 // generated access code is executed. 4444 return -1; 4445 } 4446 4447 // Returns the TOC offset where the address is stored. 4448 // Add a relocated constant to the CP. 4449 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4450 // Use RelocationHolder::none for the constant pool entry. 4451 // Otherwise we will end up with a failing NativeCall::verify(x), 4452 // where x is the address of the constant pool entry. 4453 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4454 4455 if (tocPos != NULL) { 4456 int tocOffset = (int)(tocPos - code()->consts()->start()); 4457 RelocationHolder rsp = oop.rspec(); 4458 Relocation *rel = rsp.reloc(); 4459 4460 // Store toc_offset in relocation, used by call_far_patchable. 4461 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4462 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4463 } 4464 // Relocate at the load's pc. 4465 relocate(rsp); 4466 4467 return tocOffset; 4468 } 4469 // Address_constant returned NULL, so no constant entry has been created 4470 // in that case, we return a "fatal" offset, just in case that subsequently 4471 // generated access code is executed. 4472 return -1; 4473 } 4474 4475 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4476 int tocOffset = store_const_in_toc(a); 4477 if (tocOffset == -1) return false; 4478 address tocPos = tocOffset + code()->consts()->start(); 4479 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4480 relocate(a.rspec()); 4481 load_long_pcrelative(dst, tocPos); 4482 return true; 4483 } 4484 4485 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4486 int tocOffset = store_oop_in_toc(a); 4487 if (tocOffset == -1) return false; 4488 address tocPos = tocOffset + code()->consts()->start(); 4489 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4490 4491 load_addr_pcrelative(dst, tocPos); 4492 return true; 4493 } 4494 4495 // If the instruction sequence at the given pc is a load_const_from_toc 4496 // sequence, return the value currently stored at the referenced position 4497 // in the TOC. 4498 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4499 4500 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4501 4502 long offset = get_load_const_from_toc_offset(pc); 4503 address dataLoc = NULL; 4504 if (is_load_const_from_toc_pcrelative(pc)) { 4505 dataLoc = pc + offset; 4506 } else { 4507 CodeBlob* cb = CodeCache::find_blob(pc); 4508 assert(cb && cb->is_nmethod(), "sanity"); 4509 nmethod* nm = (nmethod*)cb; 4510 dataLoc = nm->ctable_begin() + offset; 4511 } 4512 return *(intptr_t *)dataLoc; 4513 } 4514 4515 // If the instruction sequence at the given pc is a load_const_from_toc 4516 // sequence, copy the passed-in new_data value into the referenced 4517 // position in the TOC. 4518 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 4519 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4520 4521 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 4522 address dataLoc = NULL; 4523 if (is_load_const_from_toc_pcrelative(pc)) { 4524 dataLoc = pc+offset; 4525 } else { 4526 nmethod* nm = CodeCache::find_nmethod(pc); 4527 assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 4528 dataLoc = nm->ctable_begin() + offset; 4529 } 4530 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 4531 *(unsigned long *)dataLoc = new_data; 4532 } 4533 } 4534 4535 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 4536 // site. Verify by calling is_load_const_from_toc() before!! 4537 // Offset is +/- 2**32 -> use long. 4538 long MacroAssembler::get_load_const_from_toc_offset(address a) { 4539 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 4540 // expected code sequence: 4541 // z_lgrl(t, simm32); len = 6 4542 unsigned long inst; 4543 unsigned int len = get_instruction(a, &inst); 4544 return get_pcrel_offset(inst); 4545 } 4546 4547 //********************************************************************************** 4548 // inspection of generated instruction sequences for a particular pattern 4549 //********************************************************************************** 4550 4551 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 4552 #ifdef ASSERT 4553 unsigned long inst; 4554 unsigned int len = get_instruction(a+2, &inst); 4555 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 4556 const int range = 128; 4557 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 4558 VM_Version::z_SIGSEGV(); 4559 } 4560 #endif 4561 // expected code sequence: 4562 // z_lgrl(t, relAddr32); len = 6 4563 //TODO: verify accessed data is in CP, if possible. 4564 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 4565 } 4566 4567 bool MacroAssembler::is_load_const_from_toc_call(address a) { 4568 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 4569 } 4570 4571 bool MacroAssembler::is_load_const_call(address a) { 4572 return is_load_const(a) && is_call_byregister(a + load_const_size()); 4573 } 4574 4575 //------------------------------------------------- 4576 // Emitters for some really CICS instructions 4577 //------------------------------------------------- 4578 4579 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 4580 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 4581 assert(src->encoding()%2==0, "must be an even/odd register pair"); 4582 assert(pad<256, "must be a padding BYTE"); 4583 4584 Label retry; 4585 bind(retry); 4586 Assembler::z_mvcle(dst, src, pad); 4587 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4588 } 4589 4590 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 4591 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4592 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4593 assert(pad<256, "must be a padding BYTE"); 4594 4595 Label retry; 4596 bind(retry); 4597 Assembler::z_clcle(left, right, pad, Z_R0); 4598 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4599 } 4600 4601 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 4602 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4603 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4604 assert(pad<=0xfff, "must be a padding HALFWORD"); 4605 assert(VM_Version::has_ETF2(), "instruction must be available"); 4606 4607 Label retry; 4608 bind(retry); 4609 Assembler::z_clclu(left, right, pad, Z_R0); 4610 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4611 } 4612 4613 void MacroAssembler::search_string(Register end, Register start) { 4614 assert(end->encoding() != 0, "end address must not be in R0"); 4615 assert(start->encoding() != 0, "start address must not be in R0"); 4616 4617 Label retry; 4618 bind(retry); 4619 Assembler::z_srst(end, start); 4620 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4621 } 4622 4623 void MacroAssembler::search_string_uni(Register end, Register start) { 4624 assert(end->encoding() != 0, "end address must not be in R0"); 4625 assert(start->encoding() != 0, "start address must not be in R0"); 4626 assert(VM_Version::has_ETF3(), "instruction must be available"); 4627 4628 Label retry; 4629 bind(retry); 4630 Assembler::z_srstu(end, start); 4631 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4632 } 4633 4634 void MacroAssembler::kmac(Register srcBuff) { 4635 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4636 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4637 4638 Label retry; 4639 bind(retry); 4640 Assembler::z_kmac(Z_R0, srcBuff); 4641 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4642 } 4643 4644 void MacroAssembler::kimd(Register srcBuff) { 4645 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4646 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4647 4648 Label retry; 4649 bind(retry); 4650 Assembler::z_kimd(Z_R0, srcBuff); 4651 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4652 } 4653 4654 void MacroAssembler::klmd(Register srcBuff) { 4655 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4656 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4657 4658 Label retry; 4659 bind(retry); 4660 Assembler::z_klmd(Z_R0, srcBuff); 4661 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4662 } 4663 4664 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 4665 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4666 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4667 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4668 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4669 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4670 4671 Label retry; 4672 bind(retry); 4673 Assembler::z_km(dstBuff, srcBuff); 4674 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4675 } 4676 4677 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 4678 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4679 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4680 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4681 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4682 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4683 4684 Label retry; 4685 bind(retry); 4686 Assembler::z_kmc(dstBuff, srcBuff); 4687 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4688 } 4689 4690 void MacroAssembler::kmctr(Register dstBuff, Register ctrBuff, Register srcBuff) { 4691 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4692 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4693 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4694 assert(dstBuff->encoding() != 0, "dst buffer address can't be in Z_R0"); 4695 assert(ctrBuff->encoding() != 0, "ctr buffer address can't be in Z_R0"); 4696 assert(ctrBuff->encoding() % 2 == 0, "ctr buffer addr must be an even register"); 4697 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4698 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4699 4700 Label retry; 4701 bind(retry); 4702 Assembler::z_kmctr(dstBuff, ctrBuff, srcBuff); 4703 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4704 } 4705 4706 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 4707 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4708 4709 Label retry; 4710 bind(retry); 4711 Assembler::z_cksm(crcBuff, srcBuff); 4712 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4713 } 4714 4715 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 4716 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4717 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4718 4719 Label retry; 4720 bind(retry); 4721 Assembler::z_troo(r1, r2, m3); 4722 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4723 } 4724 4725 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 4726 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4727 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4728 4729 Label retry; 4730 bind(retry); 4731 Assembler::z_trot(r1, r2, m3); 4732 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4733 } 4734 4735 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 4736 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4737 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4738 4739 Label retry; 4740 bind(retry); 4741 Assembler::z_trto(r1, r2, m3); 4742 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4743 } 4744 4745 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 4746 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4747 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4748 4749 Label retry; 4750 bind(retry); 4751 Assembler::z_trtt(r1, r2, m3); 4752 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4753 } 4754 4755 //--------------------------------------- 4756 // Helpers for Intrinsic Emitters 4757 //--------------------------------------- 4758 4759 /** 4760 * uint32_t crc; 4761 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4762 */ 4763 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 4764 assert_different_registers(crc, table, tmp); 4765 assert_different_registers(val, table); 4766 if (crc == val) { // Must rotate first to use the unmodified value. 4767 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4768 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4769 } else { 4770 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4771 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4772 } 4773 z_x(crc, Address(table, tmp, 0)); 4774 } 4775 4776 /** 4777 * uint32_t crc; 4778 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4779 */ 4780 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 4781 fold_byte_crc32(crc, crc, table, tmp); 4782 } 4783 4784 /** 4785 * Emits code to update CRC-32 with a byte value according to constants in table. 4786 * 4787 * @param [in,out]crc Register containing the crc. 4788 * @param [in]val Register containing the byte to fold into the CRC. 4789 * @param [in]table Register containing the table of crc constants. 4790 * 4791 * uint32_t crc; 4792 * val = crc_table[(val ^ crc) & 0xFF]; 4793 * crc = val ^ (crc >> 8); 4794 */ 4795 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 4796 z_xr(val, crc); 4797 fold_byte_crc32(crc, val, table, val); 4798 } 4799 4800 4801 /** 4802 * @param crc register containing existing CRC (32-bit) 4803 * @param buf register pointing to input byte buffer (byte*) 4804 * @param len register containing number of bytes 4805 * @param table register pointing to CRC table 4806 */ 4807 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 4808 assert_different_registers(crc, buf, len, table, data); 4809 4810 Label L_mainLoop, L_done; 4811 const int mainLoop_stepping = 1; 4812 4813 // Process all bytes in a single-byte loop. 4814 z_ltr(len, len); 4815 z_brnh(L_done); 4816 4817 bind(L_mainLoop); 4818 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4819 add2reg(buf, mainLoop_stepping); // Advance buffer position. 4820 update_byte_crc32(crc, data, table); 4821 z_brct(len, L_mainLoop); // Iterate. 4822 4823 bind(L_done); 4824 } 4825 4826 /** 4827 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 4828 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 4829 * 4830 */ 4831 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 4832 Register t0, Register t1, Register t2, Register t3) { 4833 // This is what we implement (the DOBIG4 part): 4834 // 4835 // #define DOBIG4 c ^= *++buf4; \ 4836 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 4837 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 4838 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 4839 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 4840 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 4841 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 4842 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 4843 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 4844 4845 // XOR crc with next four bytes of buffer. 4846 lgr_if_needed(t0, crc); 4847 z_x(t0, Address(buf, bufDisp)); 4848 if (bufInc != 0) { 4849 add2reg(buf, bufInc); 4850 } 4851 4852 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 4853 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 4854 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 4855 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 4856 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 4857 4858 // XOR indexed table values to calculate updated crc. 4859 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 4860 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 4861 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 4862 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 4863 z_xr(t0, t2); // Now t0 contains the updated CRC value. 4864 lgr_if_needed(crc, t0); 4865 } 4866 4867 /** 4868 * @param crc register containing existing CRC (32-bit) 4869 * @param buf register pointing to input byte buffer (byte*) 4870 * @param len register containing number of bytes 4871 * @param table register pointing to CRC table 4872 * 4873 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 4874 */ 4875 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 4876 Register t0, Register t1, Register t2, Register t3, 4877 bool invertCRC) { 4878 assert_different_registers(crc, buf, len, table); 4879 4880 Label L_mainLoop, L_tail; 4881 Register data = t0; 4882 Register ctr = Z_R0; 4883 const int mainLoop_stepping = 4; 4884 const int log_stepping = exact_log2(mainLoop_stepping); 4885 4886 // Don't test for len <= 0 here. This pathological case should not occur anyway. 4887 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 4888 // The situation itself is detected and handled correctly by the conditional branches 4889 // following aghi(len, -stepping) and aghi(len, +stepping). 4890 4891 if (invertCRC) { 4892 not_(crc, noreg, false); // 1s complement of crc 4893 } 4894 4895 // Check for short (<4 bytes) buffer. 4896 z_srag(ctr, len, log_stepping); 4897 z_brnh(L_tail); 4898 4899 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 4900 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 4901 4902 BIND(L_mainLoop); 4903 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 4904 z_brct(ctr, L_mainLoop); // Iterate. 4905 4906 z_lrvr(crc, crc); // Revert byte order back to original. 4907 4908 // Process last few (<8) bytes of buffer. 4909 BIND(L_tail); 4910 update_byteLoop_crc32(crc, buf, len, table, data); 4911 4912 if (invertCRC) { 4913 not_(crc, noreg, false); // 1s complement of crc 4914 } 4915 } 4916 4917 /** 4918 * @param crc register containing existing CRC (32-bit) 4919 * @param buf register pointing to input byte buffer (byte*) 4920 * @param len register containing number of bytes 4921 * @param table register pointing to CRC table 4922 */ 4923 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 4924 Register t0, Register t1, Register t2, Register t3, 4925 bool invertCRC) { 4926 assert_different_registers(crc, buf, len, table); 4927 Register data = t0; 4928 4929 if (invertCRC) { 4930 not_(crc, noreg, false); // 1s complement of crc 4931 } 4932 4933 update_byteLoop_crc32(crc, buf, len, table, data); 4934 4935 if (invertCRC) { 4936 not_(crc, noreg, false); // 1s complement of crc 4937 } 4938 } 4939 4940 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 4941 bool invertCRC) { 4942 assert_different_registers(crc, buf, len, table, tmp); 4943 4944 if (invertCRC) { 4945 not_(crc, noreg, false); // 1s complement of crc 4946 } 4947 4948 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4949 update_byte_crc32(crc, tmp, table); 4950 4951 if (invertCRC) { 4952 not_(crc, noreg, false); // 1s complement of crc 4953 } 4954 } 4955 4956 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 4957 bool invertCRC) { 4958 assert_different_registers(crc, val, table); 4959 4960 if (invertCRC) { 4961 not_(crc, noreg, false); // 1s complement of crc 4962 } 4963 4964 update_byte_crc32(crc, val, table); 4965 4966 if (invertCRC) { 4967 not_(crc, noreg, false); // 1s complement of crc 4968 } 4969 } 4970 4971 // 4972 // Code for BigInteger::multiplyToLen() intrinsic. 4973 // 4974 4975 // dest_lo += src1 + src2 4976 // dest_hi += carry1 + carry2 4977 // Z_R7 is destroyed ! 4978 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 4979 Register src1, Register src2) { 4980 clear_reg(Z_R7); 4981 z_algr(dest_lo, src1); 4982 z_alcgr(dest_hi, Z_R7); 4983 z_algr(dest_lo, src2); 4984 z_alcgr(dest_hi, Z_R7); 4985 } 4986 4987 // Multiply 64 bit by 64 bit first loop. 4988 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 4989 Register x_xstart, 4990 Register y, Register y_idx, 4991 Register z, 4992 Register carry, 4993 Register product, 4994 Register idx, Register kdx) { 4995 // jlong carry, x[], y[], z[]; 4996 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4997 // huge_128 product = y[idx] * x[xstart] + carry; 4998 // z[kdx] = (jlong)product; 4999 // carry = (jlong)(product >>> 64); 5000 // } 5001 // z[xstart] = carry; 5002 5003 Label L_first_loop, L_first_loop_exit; 5004 Label L_one_x, L_one_y, L_multiply; 5005 5006 z_aghi(xstart, -1); 5007 z_brl(L_one_x); // Special case: length of x is 1. 5008 5009 // Load next two integers of x. 5010 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5011 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5012 5013 5014 bind(L_first_loop); 5015 5016 z_aghi(idx, -1); 5017 z_brl(L_first_loop_exit); 5018 z_aghi(idx, -1); 5019 z_brl(L_one_y); 5020 5021 // Load next two integers of y. 5022 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 5023 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 5024 5025 5026 bind(L_multiply); 5027 5028 Register multiplicand = product->successor(); 5029 Register product_low = multiplicand; 5030 5031 lgr_if_needed(multiplicand, x_xstart); 5032 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 5033 clear_reg(Z_R7); 5034 z_algr(product_low, carry); // Add carry to result. 5035 z_alcgr(product, Z_R7); // Add carry of the last addition. 5036 add2reg(kdx, -2); 5037 5038 // Store result. 5039 z_sllg(Z_R7, kdx, LogBytesPerInt); 5040 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5041 lgr_if_needed(carry, product); 5042 z_bru(L_first_loop); 5043 5044 5045 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5046 5047 clear_reg(y_idx); 5048 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5049 z_bru(L_multiply); 5050 5051 5052 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5053 5054 clear_reg(x_xstart); 5055 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5056 z_bru(L_first_loop); 5057 5058 bind(L_first_loop_exit); 5059 } 5060 5061 // Multiply 64 bit by 64 bit and add 128 bit. 5062 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5063 Register z, 5064 Register yz_idx, Register idx, 5065 Register carry, Register product, 5066 int offset) { 5067 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5068 // z[kdx] = (jlong)product; 5069 5070 Register multiplicand = product->successor(); 5071 Register product_low = multiplicand; 5072 5073 z_sllg(Z_R7, idx, LogBytesPerInt); 5074 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5075 5076 lgr_if_needed(multiplicand, x_xstart); 5077 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5078 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5079 5080 add2_with_carry(product, product_low, carry, yz_idx); 5081 5082 z_sllg(Z_R7, idx, LogBytesPerInt); 5083 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5084 5085 } 5086 5087 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5088 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5089 Register y, Register z, 5090 Register yz_idx, Register idx, 5091 Register jdx, 5092 Register carry, Register product, 5093 Register carry2) { 5094 // jlong carry, x[], y[], z[]; 5095 // int kdx = ystart+1; 5096 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5097 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5098 // z[kdx+idx+1] = (jlong)product; 5099 // jlong carry2 = (jlong)(product >>> 64); 5100 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5101 // z[kdx+idx] = (jlong)product; 5102 // carry = (jlong)(product >>> 64); 5103 // } 5104 // idx += 2; 5105 // if (idx > 0) { 5106 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5107 // z[kdx+idx] = (jlong)product; 5108 // carry = (jlong)(product >>> 64); 5109 // } 5110 5111 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5112 5113 // scale the index 5114 lgr_if_needed(jdx, idx); 5115 and_imm(jdx, 0xfffffffffffffffcL); 5116 rshift(jdx, 2); 5117 5118 5119 bind(L_third_loop); 5120 5121 z_aghi(jdx, -1); 5122 z_brl(L_third_loop_exit); 5123 add2reg(idx, -4); 5124 5125 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5126 lgr_if_needed(carry2, product); 5127 5128 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5129 lgr_if_needed(carry, product); 5130 z_bru(L_third_loop); 5131 5132 5133 bind(L_third_loop_exit); // Handle any left-over operand parts. 5134 5135 and_imm(idx, 0x3); 5136 z_brz(L_post_third_loop_done); 5137 5138 Label L_check_1; 5139 5140 z_aghi(idx, -2); 5141 z_brl(L_check_1); 5142 5143 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5144 lgr_if_needed(carry, product); 5145 5146 5147 bind(L_check_1); 5148 5149 add2reg(idx, 0x2); 5150 and_imm(idx, 0x1); 5151 z_aghi(idx, -1); 5152 z_brl(L_post_third_loop_done); 5153 5154 Register multiplicand = product->successor(); 5155 Register product_low = multiplicand; 5156 5157 z_sllg(Z_R7, idx, LogBytesPerInt); 5158 clear_reg(yz_idx); 5159 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5160 lgr_if_needed(multiplicand, x_xstart); 5161 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5162 clear_reg(yz_idx); 5163 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5164 5165 add2_with_carry(product, product_low, yz_idx, carry); 5166 5167 z_sllg(Z_R7, idx, LogBytesPerInt); 5168 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5169 rshift(product_low, 32); 5170 5171 lshift(product, 32); 5172 z_ogr(product_low, product); 5173 lgr_if_needed(carry, product_low); 5174 5175 bind(L_post_third_loop_done); 5176 } 5177 5178 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5179 Register y, Register ylen, 5180 Register z, 5181 Register tmp1, Register tmp2, 5182 Register tmp3, Register tmp4, 5183 Register tmp5) { 5184 ShortBranchVerifier sbv(this); 5185 5186 assert_different_registers(x, xlen, y, ylen, z, 5187 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5188 assert_different_registers(x, xlen, y, ylen, z, 5189 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5190 5191 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5192 5193 // In openJdk, we store the argument as 32-bit value to slot. 5194 Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian. 5195 5196 const Register idx = tmp1; 5197 const Register kdx = tmp2; 5198 const Register xstart = tmp3; 5199 5200 const Register y_idx = tmp4; 5201 const Register carry = tmp5; 5202 const Register product = Z_R0_scratch; 5203 const Register x_xstart = Z_R8; 5204 5205 // First Loop. 5206 // 5207 // final static long LONG_MASK = 0xffffffffL; 5208 // int xstart = xlen - 1; 5209 // int ystart = ylen - 1; 5210 // long carry = 0; 5211 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5212 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5213 // z[kdx] = (int)product; 5214 // carry = product >>> 32; 5215 // } 5216 // z[xstart] = (int)carry; 5217 // 5218 5219 lgr_if_needed(idx, ylen); // idx = ylen 5220 z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended. 5221 clear_reg(carry); // carry = 0 5222 5223 Label L_done; 5224 5225 lgr_if_needed(xstart, xlen); 5226 z_aghi(xstart, -1); 5227 z_brl(L_done); 5228 5229 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5230 5231 NearLabel L_second_loop; 5232 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5233 5234 NearLabel L_carry; 5235 z_aghi(kdx, -1); 5236 z_brz(L_carry); 5237 5238 // Store lower 32 bits of carry. 5239 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5240 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5241 rshift(carry, 32); 5242 z_aghi(kdx, -1); 5243 5244 5245 bind(L_carry); 5246 5247 // Store upper 32 bits of carry. 5248 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5249 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5250 5251 // Second and third (nested) loops. 5252 // 5253 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5254 // carry = 0; 5255 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5256 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5257 // (z[k] & LONG_MASK) + carry; 5258 // z[k] = (int)product; 5259 // carry = product >>> 32; 5260 // } 5261 // z[i] = (int)carry; 5262 // } 5263 // 5264 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5265 5266 const Register jdx = tmp1; 5267 5268 bind(L_second_loop); 5269 5270 clear_reg(carry); // carry = 0; 5271 lgr_if_needed(jdx, ylen); // j = ystart+1 5272 5273 z_aghi(xstart, -1); // i = xstart-1; 5274 z_brl(L_done); 5275 5276 // Use free slots in the current stackframe instead of push/pop. 5277 Address zsave(Z_SP, _z_abi(carg_1)); 5278 reg2mem_opt(z, zsave); 5279 5280 5281 Label L_last_x; 5282 5283 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5284 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5285 z_aghi(xstart, -1); // i = xstart-1; 5286 z_brl(L_last_x); 5287 5288 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5289 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5290 5291 5292 Label L_third_loop_prologue; 5293 5294 bind(L_third_loop_prologue); 5295 5296 Address xsave(Z_SP, _z_abi(carg_2)); 5297 Address xlensave(Z_SP, _z_abi(carg_3)); 5298 Address ylensave(Z_SP, _z_abi(carg_4)); 5299 5300 reg2mem_opt(x, xsave); 5301 reg2mem_opt(xstart, xlensave); 5302 reg2mem_opt(ylen, ylensave); 5303 5304 5305 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5306 5307 mem2reg_opt(z, zsave); 5308 mem2reg_opt(x, xsave); 5309 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5310 mem2reg_opt(ylen, ylensave); 5311 5312 add2reg(tmp3, 1, xlen); 5313 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5314 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5315 z_aghi(tmp3, -1); 5316 z_brl(L_done); 5317 5318 rshift(carry, 32); 5319 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5320 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5321 z_bru(L_second_loop); 5322 5323 // Next infrequent code is moved outside loops. 5324 bind(L_last_x); 5325 5326 clear_reg(x_xstart); 5327 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5328 z_bru(L_third_loop_prologue); 5329 5330 bind(L_done); 5331 5332 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5333 } 5334 5335 #ifndef PRODUCT 5336 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5337 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5338 Label ok; 5339 if (check_equal) { 5340 z_bre(ok); 5341 } else { 5342 z_brne(ok); 5343 } 5344 stop(msg, id); 5345 bind(ok); 5346 } 5347 5348 // Assert if CC indicates "low". 5349 void MacroAssembler::asm_assert_low(const char *msg, int id) { 5350 Label ok; 5351 z_brnl(ok); 5352 stop(msg, id); 5353 bind(ok); 5354 } 5355 5356 // Assert if CC indicates "high". 5357 void MacroAssembler::asm_assert_high(const char *msg, int id) { 5358 Label ok; 5359 z_brnh(ok); 5360 stop(msg, id); 5361 bind(ok); 5362 } 5363 5364 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false) 5365 // generate non-relocatable code. 5366 void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) { 5367 Label ok; 5368 if (check_equal) { z_bre(ok); } 5369 else { z_brne(ok); } 5370 stop_static(msg, id); 5371 bind(ok); 5372 } 5373 5374 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5375 Register mem_base, const char* msg, int id) { 5376 switch (size) { 5377 case 4: 5378 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5379 break; 5380 case 8: 5381 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5382 break; 5383 default: 5384 ShouldNotReachHere(); 5385 } 5386 if (allow_relocation) { asm_assert(check_equal, msg, id); } 5387 else { asm_assert_static(check_equal, msg, id); } 5388 } 5389 5390 // Check the condition 5391 // expected_size == FP - SP 5392 // after transformation: 5393 // expected_size - FP + SP == 0 5394 // Destroys Register expected_size if no tmp register is passed. 5395 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5396 if (tmp == noreg) { 5397 tmp = expected_size; 5398 } else { 5399 if (tmp != expected_size) { 5400 z_lgr(tmp, expected_size); 5401 } 5402 z_algr(tmp, Z_SP); 5403 z_slg(tmp, 0, Z_R0, Z_SP); 5404 asm_assert_eq(msg, id); 5405 } 5406 } 5407 #endif // !PRODUCT 5408 5409 // Save and restore functions: Exclude Z_R0. 5410 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5411 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5412 if (include_fp) { 5413 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5414 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5415 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5416 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5417 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5418 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5419 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5420 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5421 } 5422 if (include_flags) { 5423 Label done; 5424 z_mvi(Address(dst, offset), 2); // encoding: equal 5425 z_bre(done); 5426 z_mvi(Address(dst, offset), 4); // encoding: higher 5427 z_brh(done); 5428 z_mvi(Address(dst, offset), 1); // encoding: lower 5429 bind(done); 5430 } 5431 } 5432 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5433 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5434 if (include_fp) { 5435 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5436 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5437 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5438 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5439 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5440 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5441 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5442 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5443 } 5444 if (include_flags) { 5445 z_cli(Address(src, offset), 2); // see encoding above 5446 } 5447 } 5448 5449 // Plausibility check for oops. 5450 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5451 if (!VerifyOops) return; 5452 5453 BLOCK_COMMENT("verify_oop {"); 5454 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5455 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5456 5457 save_return_pc(); 5458 5459 // Push frame, but preserve flags 5460 z_lgr(Z_R0, Z_SP); 5461 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5462 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5463 5464 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5465 5466 lgr_if_needed(Z_ARG2, oop); 5467 load_const_optimized(Z_ARG1, (address)msg); 5468 load_const_optimized(Z_R1, entry_addr); 5469 z_lg(Z_R1, 0, Z_R1); 5470 call_c(Z_R1); 5471 5472 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5473 pop_frame(); 5474 restore_return_pc(); 5475 5476 BLOCK_COMMENT("} verify_oop "); 5477 } 5478 5479 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5480 if (!VerifyOops) return; 5481 5482 BLOCK_COMMENT("verify_oop {"); 5483 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5484 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5485 5486 save_return_pc(); 5487 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5488 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5489 5490 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5491 load_const_optimized(Z_ARG1, (address)msg); 5492 load_const_optimized(Z_R1, entry_addr); 5493 z_lg(Z_R1, 0, Z_R1); 5494 call_c(Z_R1); 5495 5496 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5497 pop_frame(); 5498 restore_return_pc(); 5499 5500 BLOCK_COMMENT("} verify_oop "); 5501 } 5502 5503 const char* MacroAssembler::stop_types[] = { 5504 "stop", 5505 "untested", 5506 "unimplemented", 5507 "shouldnotreachhere" 5508 }; 5509 5510 static void stop_on_request(const char* tp, const char* msg) { 5511 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5512 guarantee(false, "Z assembly code requires stop: %s", msg); 5513 } 5514 5515 void MacroAssembler::stop(int type, const char* msg, int id) { 5516 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5517 5518 // Setup arguments. 5519 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5520 load_const(Z_ARG2, (void*) msg); 5521 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5522 save_return_pc(); // Saves return pc Z_R14. 5523 push_frame_abi160(0); 5524 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5525 // The plain disassembler does not recognize illtrap. It instead displays 5526 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5527 // the proper beginning of the next instruction. 5528 z_illtrap(); // Illegal instruction. 5529 z_illtrap(); // Illegal instruction. 5530 5531 BLOCK_COMMENT(" } stop"); 5532 } 5533 5534 // Special version of stop() for code size reduction. 5535 // Reuses the previously generated call sequence, if any. 5536 // Generates the call sequence on its own, if necessary. 5537 // Note: This code will work only in non-relocatable code! 5538 // The relative address of the data elements (arg1, arg2) must not change. 5539 // The reentry point must not move relative to it's users. This prerequisite 5540 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5541 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5542 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5543 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5544 5545 // Setup arguments. 5546 if (allow_relocation) { 5547 // Relocatable version (for comparison purposes). Remove after some time. 5548 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5549 load_const(Z_ARG2, (void*) msg); 5550 } else { 5551 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 5552 load_absolute_address(Z_ARG2, (address)msg); 5553 } 5554 if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 5555 BLOCK_COMMENT("branch to reentry point:"); 5556 z_brc(bcondAlways, reentry); 5557 } else { 5558 BLOCK_COMMENT("reentry point:"); 5559 reentry = pc(); // Re-entry point for subsequent stop calls. 5560 save_return_pc(); // Saves return pc Z_R14. 5561 push_frame_abi160(0); 5562 if (allow_relocation) { 5563 reentry = NULL; // Prevent reentry if code relocation is allowed. 5564 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5565 } else { 5566 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5567 } 5568 z_illtrap(); // Illegal instruction as emergency stop, should the above call return. 5569 } 5570 BLOCK_COMMENT(" } stop_chain"); 5571 5572 return reentry; 5573 } 5574 5575 // Special version of stop() for code size reduction. 5576 // Assumes constant relative addresses for data and runtime call. 5577 void MacroAssembler::stop_static(int type, const char* msg, int id) { 5578 stop_chain(NULL, type, msg, id, false); 5579 } 5580 5581 void MacroAssembler::stop_subroutine() { 5582 unimplemented("stop_subroutine", 710); 5583 } 5584 5585 // Prints msg to stdout from within generated code.. 5586 void MacroAssembler::warn(const char* msg) { 5587 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 5588 load_absolute_address(Z_R1, (address) warning); 5589 load_absolute_address(Z_ARG1, (address) msg); 5590 (void) call(Z_R1); 5591 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 5592 } 5593 5594 #ifndef PRODUCT 5595 5596 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 5597 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 5598 if (!ZapEmptyStackFields) return; 5599 BLOCK_COMMENT("zap memory region {"); 5600 load_const_optimized(val, 0x0101010101010101); 5601 int size = before + after; 5602 if (low == high && size < 5 && size > 0) { 5603 int offset = -before*BytesPerWord; 5604 for (int i = 0; i < size; ++i) { 5605 z_stg(val, Address(low, offset)); 5606 offset +=(1*BytesPerWord); 5607 } 5608 } else { 5609 add2reg(addr, -before*BytesPerWord, low); 5610 if (after) { 5611 #ifdef ASSERT 5612 jlong check = after * BytesPerWord; 5613 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 5614 #endif 5615 add2reg(high, after * BytesPerWord); 5616 } 5617 NearLabel loop; 5618 bind(loop); 5619 z_stg(val, Address(addr)); 5620 add2reg(addr, 8); 5621 compare64_and_branch(addr, high, bcondNotHigh, loop); 5622 if (after) { 5623 add2reg(high, -after * BytesPerWord); 5624 } 5625 } 5626 BLOCK_COMMENT("} zap memory region"); 5627 } 5628 #endif // !PRODUCT 5629 5630 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { 5631 _masm = masm; 5632 _masm->load_absolute_address(_rscratch, (address)flag_addr); 5633 _masm->load_and_test_int(_rscratch, Address(_rscratch)); 5634 if (value) { 5635 _masm->z_brne(_label); // Skip if true, i.e. != 0. 5636 } else { 5637 _masm->z_bre(_label); // Skip if false, i.e. == 0. 5638 } 5639 } 5640 5641 SkipIfEqual::~SkipIfEqual() { 5642 _masm->bind(_label); 5643 }