1 /* 2 * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2019 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/codeBuffer.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "compiler/disassembler.hpp" 30 #include "gc/shared/barrierSet.hpp" 31 #include "gc/shared/barrierSetAssembler.hpp" 32 #include "gc/shared/collectedHeap.inline.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "memory/universe.hpp" 37 #include "oops/accessDecorators.hpp" 38 #include "oops/compressedKlass.inline.hpp" 39 #include "oops/compressedOops.inline.hpp" 40 #include "oops/klass.inline.hpp" 41 #include "prims/methodHandles.hpp" 42 #include "registerSaver_s390.hpp" 43 #include "runtime/icache.hpp" 44 #include "runtime/interfaceSupport.inline.hpp" 45 #include "runtime/objectMonitor.hpp" 46 #include "runtime/os.hpp" 47 #include "runtime/safepoint.hpp" 48 #include "runtime/safepointMechanism.hpp" 49 #include "runtime/sharedRuntime.hpp" 50 #include "runtime/stubRoutines.hpp" 51 #include "utilities/events.hpp" 52 #include "utilities/macros.hpp" 53 #include "utilities/powerOfTwo.hpp" 54 55 #include <ucontext.h> 56 57 #define BLOCK_COMMENT(str) block_comment(str) 58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 59 60 // Move 32-bit register if destination and source are different. 61 void MacroAssembler::lr_if_needed(Register rd, Register rs) { 62 if (rs != rd) { z_lr(rd, rs); } 63 } 64 65 // Move register if destination and source are different. 66 void MacroAssembler::lgr_if_needed(Register rd, Register rs) { 67 if (rs != rd) { z_lgr(rd, rs); } 68 } 69 70 // Zero-extend 32-bit register into 64-bit register if destination and source are different. 71 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { 72 if (rs != rd) { z_llgfr(rd, rs); } 73 } 74 75 // Move float register if destination and source are different. 76 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { 77 if (rs != rd) { z_ldr(rd, rs); } 78 } 79 80 // Move integer register if destination and source are different. 81 // It is assumed that shorter-than-int types are already 82 // appropriately sign-extended. 83 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, 84 BasicType src_type) { 85 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); 86 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); 87 88 if (dst_type == src_type) { 89 lgr_if_needed(dst, src); // Just move all 64 bits. 90 return; 91 } 92 93 switch (dst_type) { 94 // Do not support these types for now. 95 // case T_BOOLEAN: 96 case T_BYTE: // signed byte 97 switch (src_type) { 98 case T_INT: 99 z_lgbr(dst, src); 100 break; 101 default: 102 ShouldNotReachHere(); 103 } 104 return; 105 106 case T_CHAR: 107 case T_SHORT: 108 switch (src_type) { 109 case T_INT: 110 if (dst_type == T_CHAR) { 111 z_llghr(dst, src); 112 } else { 113 z_lghr(dst, src); 114 } 115 break; 116 default: 117 ShouldNotReachHere(); 118 } 119 return; 120 121 case T_INT: 122 switch (src_type) { 123 case T_BOOLEAN: 124 case T_BYTE: 125 case T_CHAR: 126 case T_SHORT: 127 case T_INT: 128 case T_LONG: 129 case T_OBJECT: 130 case T_ARRAY: 131 case T_VOID: 132 case T_ADDRESS: 133 lr_if_needed(dst, src); 134 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). 135 return; 136 137 default: 138 assert(false, "non-integer src type"); 139 return; 140 } 141 case T_LONG: 142 switch (src_type) { 143 case T_BOOLEAN: 144 case T_BYTE: 145 case T_CHAR: 146 case T_SHORT: 147 case T_INT: 148 z_lgfr(dst, src); // sign extension 149 return; 150 151 case T_LONG: 152 case T_OBJECT: 153 case T_ARRAY: 154 case T_VOID: 155 case T_ADDRESS: 156 lgr_if_needed(dst, src); 157 return; 158 159 default: 160 assert(false, "non-integer src type"); 161 return; 162 } 163 return; 164 case T_OBJECT: 165 case T_ARRAY: 166 case T_VOID: 167 case T_ADDRESS: 168 switch (src_type) { 169 // These types don't make sense to be converted to pointers: 170 // case T_BOOLEAN: 171 // case T_BYTE: 172 // case T_CHAR: 173 // case T_SHORT: 174 175 case T_INT: 176 z_llgfr(dst, src); // zero extension 177 return; 178 179 case T_LONG: 180 case T_OBJECT: 181 case T_ARRAY: 182 case T_VOID: 183 case T_ADDRESS: 184 lgr_if_needed(dst, src); 185 return; 186 187 default: 188 assert(false, "non-integer src type"); 189 return; 190 } 191 return; 192 default: 193 assert(false, "non-integer dst type"); 194 return; 195 } 196 } 197 198 // Move float register if destination and source are different. 199 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, 200 FloatRegister src, BasicType src_type) { 201 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); 202 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); 203 if (dst_type == src_type) { 204 ldr_if_needed(dst, src); // Just move all 64 bits. 205 } else { 206 switch (dst_type) { 207 case T_FLOAT: 208 assert(src_type == T_DOUBLE, "invalid float type combination"); 209 z_ledbr(dst, src); 210 return; 211 case T_DOUBLE: 212 assert(src_type == T_FLOAT, "invalid float type combination"); 213 z_ldebr(dst, src); 214 return; 215 default: 216 assert(false, "non-float dst type"); 217 return; 218 } 219 } 220 } 221 222 // Optimized emitter for reg to mem operations. 223 // Uses modern instructions if running on modern hardware, classic instructions 224 // otherwise. Prefers (usually shorter) classic instructions if applicable. 225 // Data register (reg) cannot be used as work register. 226 // 227 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 228 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 229 void MacroAssembler::freg2mem_opt(FloatRegister reg, 230 int64_t disp, 231 Register index, 232 Register base, 233 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 234 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 235 Register scratch) { 236 index = (index == noreg) ? Z_R0 : index; 237 if (Displacement::is_shortDisp(disp)) { 238 (this->*classic)(reg, disp, index, base); 239 } else { 240 if (Displacement::is_validDisp(disp)) { 241 (this->*modern)(reg, disp, index, base); 242 } else { 243 if (scratch != Z_R0 && scratch != Z_R1) { 244 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 245 } else { 246 if (scratch != Z_R0) { // scratch == Z_R1 247 if ((scratch == index) || (index == base)) { 248 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 249 } else { 250 add2reg(scratch, disp, base); 251 (this->*classic)(reg, 0, index, scratch); 252 if (base == scratch) { 253 add2reg(base, -disp); // Restore base. 254 } 255 } 256 } else { // scratch == Z_R0 257 z_lgr(scratch, base); 258 add2reg(base, disp); 259 (this->*classic)(reg, 0, index, base); 260 z_lgr(base, scratch); // Restore base. 261 } 262 } 263 } 264 } 265 } 266 267 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { 268 if (is_double) { 269 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); 270 } else { 271 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); 272 } 273 } 274 275 // Optimized emitter for mem to reg operations. 276 // Uses modern instructions if running on modern hardware, classic instructions 277 // otherwise. Prefers (usually shorter) classic instructions if applicable. 278 // data register (reg) cannot be used as work register. 279 // 280 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default). 281 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! 282 void MacroAssembler::mem2freg_opt(FloatRegister reg, 283 int64_t disp, 284 Register index, 285 Register base, 286 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), 287 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), 288 Register scratch) { 289 index = (index == noreg) ? Z_R0 : index; 290 if (Displacement::is_shortDisp(disp)) { 291 (this->*classic)(reg, disp, index, base); 292 } else { 293 if (Displacement::is_validDisp(disp)) { 294 (this->*modern)(reg, disp, index, base); 295 } else { 296 if (scratch != Z_R0 && scratch != Z_R1) { 297 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 298 } else { 299 if (scratch != Z_R0) { // scratch == Z_R1 300 if ((scratch == index) || (index == base)) { 301 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 302 } else { 303 add2reg(scratch, disp, base); 304 (this->*classic)(reg, 0, index, scratch); 305 if (base == scratch) { 306 add2reg(base, -disp); // Restore base. 307 } 308 } 309 } else { // scratch == Z_R0 310 z_lgr(scratch, base); 311 add2reg(base, disp); 312 (this->*classic)(reg, 0, index, base); 313 z_lgr(base, scratch); // Restore base. 314 } 315 } 316 } 317 } 318 } 319 320 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { 321 if (is_double) { 322 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); 323 } else { 324 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); 325 } 326 } 327 328 // Optimized emitter for reg to mem operations. 329 // Uses modern instructions if running on modern hardware, classic instructions 330 // otherwise. Prefers (usually shorter) classic instructions if applicable. 331 // Data register (reg) cannot be used as work register. 332 // 333 // Don't rely on register locking, instead pass a scratch register 334 // (Z_R0 by default) 335 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! 336 void MacroAssembler::reg2mem_opt(Register reg, 337 int64_t disp, 338 Register index, 339 Register base, 340 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 341 void (MacroAssembler::*classic)(Register, int64_t, Register, Register), 342 Register scratch) { 343 index = (index == noreg) ? Z_R0 : index; 344 if (Displacement::is_shortDisp(disp)) { 345 (this->*classic)(reg, disp, index, base); 346 } else { 347 if (Displacement::is_validDisp(disp)) { 348 (this->*modern)(reg, disp, index, base); 349 } else { 350 if (scratch != Z_R0 && scratch != Z_R1) { 351 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 352 } else { 353 if (scratch != Z_R0) { // scratch == Z_R1 354 if ((scratch == index) || (index == base)) { 355 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 356 } else { 357 add2reg(scratch, disp, base); 358 (this->*classic)(reg, 0, index, scratch); 359 if (base == scratch) { 360 add2reg(base, -disp); // Restore base. 361 } 362 } 363 } else { // scratch == Z_R0 364 if ((scratch == reg) || (scratch == base) || (reg == base)) { 365 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. 366 } else { 367 z_lgr(scratch, base); 368 add2reg(base, disp); 369 (this->*classic)(reg, 0, index, base); 370 z_lgr(base, scratch); // Restore base. 371 } 372 } 373 } 374 } 375 } 376 } 377 378 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { 379 int store_offset = offset(); 380 if (is_double) { 381 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); 382 } else { 383 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); 384 } 385 return store_offset; 386 } 387 388 // Optimized emitter for mem to reg operations. 389 // Uses modern instructions if running on modern hardware, classic instructions 390 // otherwise. Prefers (usually shorter) classic instructions if applicable. 391 // Data register (reg) will be used as work register where possible. 392 void MacroAssembler::mem2reg_opt(Register reg, 393 int64_t disp, 394 Register index, 395 Register base, 396 void (MacroAssembler::*modern) (Register, int64_t, Register, Register), 397 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { 398 index = (index == noreg) ? Z_R0 : index; 399 if (Displacement::is_shortDisp(disp)) { 400 (this->*classic)(reg, disp, index, base); 401 } else { 402 if (Displacement::is_validDisp(disp)) { 403 (this->*modern)(reg, disp, index, base); 404 } else { 405 if ((reg == index) && (reg == base)) { 406 z_sllg(reg, reg, 1); 407 add2reg(reg, disp); 408 (this->*classic)(reg, 0, noreg, reg); 409 } else if ((reg == index) && (reg != Z_R0)) { 410 add2reg(reg, disp); 411 (this->*classic)(reg, 0, reg, base); 412 } else if (reg == base) { 413 add2reg(reg, disp); 414 (this->*classic)(reg, 0, index, reg); 415 } else if (reg != Z_R0) { 416 add2reg(reg, disp, base); 417 (this->*classic)(reg, 0, index, reg); 418 } else { // reg == Z_R0 && reg != base here 419 add2reg(base, disp); 420 (this->*classic)(reg, 0, index, base); 421 add2reg(base, -disp); 422 } 423 } 424 } 425 } 426 427 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { 428 if (is_double) { 429 z_lg(reg, a); 430 } else { 431 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); 432 } 433 } 434 435 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { 436 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); 437 } 438 439 void MacroAssembler::and_imm(Register r, long mask, 440 Register tmp /* = Z_R0 */, 441 bool wide /* = false */) { 442 assert(wide || Immediate::is_simm32(mask), "mask value too large"); 443 444 if (!wide) { 445 z_nilf(r, mask); 446 return; 447 } 448 449 assert(r != tmp, " need a different temporary register !"); 450 load_const_optimized(tmp, mask); 451 z_ngr(r, tmp); 452 } 453 454 // Calculate the 1's complement. 455 // Note: The condition code is neither preserved nor correctly set by this code!!! 456 // Note: (wide == false) does not protect the high order half of the target register 457 // from alteration. It only serves as optimization hint for 32-bit results. 458 void MacroAssembler::not_(Register r1, Register r2, bool wide) { 459 460 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. 461 z_xilf(r1, -1); 462 if (wide) { 463 z_xihf(r1, -1); 464 } 465 } else { // Distinct src and dst registers. 466 load_const_optimized(r1, -1); 467 z_xgr(r1, r2); 468 } 469 } 470 471 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { 472 assert(lBitPos >= 0, "zero is leftmost bit position"); 473 assert(rBitPos <= 63, "63 is rightmost bit position"); 474 assert(lBitPos <= rBitPos, "inverted selection interval"); 475 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); 476 } 477 478 // Helper function for the "Rotate_then_<logicalOP>" emitters. 479 // Rotate src, then mask register contents such that only bits in range survive. 480 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. 481 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. 482 // The caller must ensure that the selected range only contains bits with defined value. 483 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, 484 int nRotate, bool src32bit, bool dst32bit, bool oneBits) { 485 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); 486 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). 487 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). 488 // Pre-determine which parts of dst will be zero after shift/rotate. 489 bool llZero = sll4rll && (nRotate >= 16); 490 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); 491 bool lfZero = llZero && lhZero; 492 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); 493 bool hhZero = (srl4rll && (nRotate <= -16)); 494 bool hfZero = hlZero && hhZero; 495 496 // rotate then mask src operand. 497 // if oneBits == true, all bits outside selected range are 1s. 498 // if oneBits == false, all bits outside selected range are 0s. 499 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. 500 if (dst32bit) { 501 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. 502 } else { 503 if (sll4rll) { z_sllg(dst, src, nRotate); } 504 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 505 else { z_rllg(dst, src, nRotate); } 506 } 507 } else { 508 if (sll4rll) { z_sllg(dst, src, nRotate); } 509 else if (srl4rll) { z_srlg(dst, src, -nRotate); } 510 else { z_rllg(dst, src, nRotate); } 511 } 512 513 unsigned long range_mask = create_mask(lBitPos, rBitPos); 514 unsigned int range_mask_h = (unsigned int)(range_mask >> 32); 515 unsigned int range_mask_l = (unsigned int)range_mask; 516 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); 517 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); 518 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); 519 unsigned short range_mask_ll = (unsigned short)range_mask; 520 // Works for z9 and newer H/W. 521 if (oneBits) { 522 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. 523 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } 524 } else { 525 // All bits outside range become 0s 526 if (((~range_mask_l) != 0) && !lfZero) { 527 z_nilf(dst, range_mask_l); 528 } 529 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { 530 z_nihf(dst, range_mask_h); 531 } 532 } 533 } 534 535 // Rotate src, then insert selected range from rotated src into dst. 536 // Clear dst before, if requested. 537 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, 538 int nRotate, bool clear_dst) { 539 // This version does not depend on src being zero-extended int2long. 540 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 541 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. 542 } 543 544 // Rotate src, then and selected range from rotated src into dst. 545 // Set condition code only if so requested. Otherwise it is unpredictable. 546 // See performance note in macroAssembler_s390.hpp for important information. 547 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, 548 int nRotate, bool test_only) { 549 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 550 // This version does not depend on src being zero-extended int2long. 551 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 552 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 553 } 554 555 // Rotate src, then or selected range from rotated src into dst. 556 // Set condition code only if so requested. Otherwise it is unpredictable. 557 // See performance note in macroAssembler_s390.hpp for important information. 558 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, 559 int nRotate, bool test_only) { 560 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 561 // This version does not depend on src being zero-extended int2long. 562 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 563 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 564 } 565 566 // Rotate src, then xor selected range from rotated src into dst. 567 // Set condition code only if so requested. Otherwise it is unpredictable. 568 // See performance note in macroAssembler_s390.hpp for important information. 569 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, 570 int nRotate, bool test_only) { 571 guarantee(!test_only, "Emitter not fit for test_only instruction variant."); 572 // This version does not depend on src being zero-extended int2long. 573 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. 574 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. 575 } 576 577 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { 578 if (inc.is_register()) { 579 z_agr(r1, inc.as_register()); 580 } else { // constant 581 intptr_t imm = inc.as_constant(); 582 add2reg(r1, imm); 583 } 584 } 585 // Helper function to multiply the 64bit contents of a register by a 16bit constant. 586 // The optimization tries to avoid the mghi instruction, since it uses the FPU for 587 // calculation and is thus rather slow. 588 // 589 // There is no handling for special cases, e.g. cval==0 or cval==1. 590 // 591 // Returns len of generated code block. 592 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { 593 int block_start = offset(); 594 595 bool sign_flip = cval < 0; 596 cval = sign_flip ? -cval : cval; 597 598 BLOCK_COMMENT("Reg64*Con16 {"); 599 600 int bit1 = cval & -cval; 601 if (bit1 == cval) { 602 z_sllg(rval, rval, exact_log2(bit1)); 603 if (sign_flip) { z_lcgr(rval, rval); } 604 } else { 605 int bit2 = (cval-bit1) & -(cval-bit1); 606 if ((bit1+bit2) == cval) { 607 z_sllg(work, rval, exact_log2(bit1)); 608 z_sllg(rval, rval, exact_log2(bit2)); 609 z_agr(rval, work); 610 if (sign_flip) { z_lcgr(rval, rval); } 611 } else { 612 if (sign_flip) { z_mghi(rval, -cval); } 613 else { z_mghi(rval, cval); } 614 } 615 } 616 BLOCK_COMMENT("} Reg64*Con16"); 617 618 int block_end = offset(); 619 return block_end - block_start; 620 } 621 622 // Generic operation r1 := r2 + imm. 623 // 624 // Should produce the best code for each supported CPU version. 625 // r2 == noreg yields r1 := r1 + imm 626 // imm == 0 emits either no instruction or r1 := r2 ! 627 // NOTES: 1) Don't use this function where fixed sized 628 // instruction sequences are required!!! 629 // 2) Don't use this function if condition code 630 // setting is required! 631 // 3) Despite being declared as int64_t, the parameter imm 632 // must be a simm_32 value (= signed 32-bit integer). 633 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { 634 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); 635 636 if (r2 == noreg) { r2 = r1; } 637 638 // Handle special case imm == 0. 639 if (imm == 0) { 640 lgr_if_needed(r1, r2); 641 // Nothing else to do. 642 return; 643 } 644 645 if (!PreferLAoverADD || (r2 == Z_R0)) { 646 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 647 648 // Can we encode imm in 16 bits signed? 649 if (Immediate::is_simm16(imm)) { 650 if (r1 == r2) { 651 z_aghi(r1, imm); 652 return; 653 } 654 if (distinctOpnds) { 655 z_aghik(r1, r2, imm); 656 return; 657 } 658 z_lgr(r1, r2); 659 z_aghi(r1, imm); 660 return; 661 } 662 } else { 663 // Can we encode imm in 12 bits unsigned? 664 if (Displacement::is_shortDisp(imm)) { 665 z_la(r1, imm, r2); 666 return; 667 } 668 // Can we encode imm in 20 bits signed? 669 if (Displacement::is_validDisp(imm)) { 670 // Always use LAY instruction, so we don't need the tmp register. 671 z_lay(r1, imm, r2); 672 return; 673 } 674 675 } 676 677 // Can handle it (all possible values) with long immediates. 678 lgr_if_needed(r1, r2); 679 z_agfi(r1, imm); 680 } 681 682 // Generic operation r := b + x + d 683 // 684 // Addition of several operands with address generation semantics - sort of: 685 // - no restriction on the registers. Any register will do for any operand. 686 // - x == noreg: operand will be disregarded. 687 // - b == noreg: will use (contents of) result reg as operand (r := r + d). 688 // - x == Z_R0: just disregard 689 // - b == Z_R0: use as operand. This is not address generation semantics!!! 690 // 691 // The same restrictions as on add2reg() are valid!!! 692 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { 693 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); 694 695 if (x == noreg) { x = Z_R0; } 696 if (b == noreg) { b = r; } 697 698 // Handle special case x == R0. 699 if (x == Z_R0) { 700 // Can simply add the immediate value to the base register. 701 add2reg(r, d, b); 702 return; 703 } 704 705 if (!PreferLAoverADD || (b == Z_R0)) { 706 bool distinctOpnds = VM_Version::has_DistinctOpnds(); 707 // Handle special case d == 0. 708 if (d == 0) { 709 if (b == x) { z_sllg(r, b, 1); return; } 710 if (r == x) { z_agr(r, b); return; } 711 if (r == b) { z_agr(r, x); return; } 712 if (distinctOpnds) { z_agrk(r, x, b); return; } 713 z_lgr(r, b); 714 z_agr(r, x); 715 } else { 716 if (x == b) { z_sllg(r, x, 1); } 717 else if (r == x) { z_agr(r, b); } 718 else if (r == b) { z_agr(r, x); } 719 else if (distinctOpnds) { z_agrk(r, x, b); } 720 else { 721 z_lgr(r, b); 722 z_agr(r, x); 723 } 724 add2reg(r, d); 725 } 726 } else { 727 // Can we encode imm in 12 bits unsigned? 728 if (Displacement::is_shortDisp(d)) { 729 z_la(r, d, x, b); 730 return; 731 } 732 // Can we encode imm in 20 bits signed? 733 if (Displacement::is_validDisp(d)) { 734 z_lay(r, d, x, b); 735 return; 736 } 737 z_la(r, 0, x, b); 738 add2reg(r, d); 739 } 740 } 741 742 // Generic emitter (32bit) for direct memory increment. 743 // For optimal code, do not specify Z_R0 as temp register. 744 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { 745 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 746 z_asi(a, imm); 747 } else { 748 z_lgf(tmp, a); 749 add2reg(tmp, imm); 750 z_st(tmp, a); 751 } 752 } 753 754 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { 755 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { 756 z_agsi(a, imm); 757 } else { 758 z_lg(tmp, a); 759 add2reg(tmp, imm); 760 z_stg(tmp, a); 761 } 762 } 763 764 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 765 switch (size_in_bytes) { 766 case 8: z_lg(dst, src); break; 767 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; 768 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; 769 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; 770 default: ShouldNotReachHere(); 771 } 772 } 773 774 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 775 switch (size_in_bytes) { 776 case 8: z_stg(src, dst); break; 777 case 4: z_st(src, dst); break; 778 case 2: z_sth(src, dst); break; 779 case 1: z_stc(src, dst); break; 780 default: ShouldNotReachHere(); 781 } 782 } 783 784 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and 785 // a high-order summand in register tmp. 786 // 787 // return value: < 0: No split required, si20 actually has property uimm12. 788 // >= 0: Split performed. Use return value as uimm12 displacement and 789 // tmp as index register. 790 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { 791 assert(Immediate::is_simm20(si20_offset), "sanity"); 792 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. 793 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. 794 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || 795 !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); 796 assert((lg_off+ll_off) == si20_offset, "offset splitup error"); 797 798 Register work = accumulate? Z_R0 : tmp; 799 800 if (fixed_codelen) { // Len of code = 10 = 4 + 6. 801 z_lghi(work, ll_off>>12); // Implicit sign extension. 802 z_slag(work, work, 12); 803 } else { // Len of code = 0..10. 804 if (ll_off == 0) { return -1; } 805 // ll_off has 8 significant bits (at most) plus sign. 806 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. 807 z_llilh(work, ll_off >> 16); 808 if (ll_off < 0) { // Sign-extension required. 809 z_lgfr(work, work); 810 } 811 } else { 812 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. 813 z_llill(work, ll_off); 814 } else { // Non-zero bits in both halfbytes. 815 z_lghi(work, ll_off>>12); // Implicit sign extension. 816 z_slag(work, work, 12); 817 } 818 } 819 } 820 if (accumulate) { z_algr(tmp, work); } // len of code += 4 821 return lg_off; 822 } 823 824 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 825 if (Displacement::is_validDisp(si20)) { 826 z_ley(t, si20, a); 827 } else { 828 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset 829 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 830 // pool loads). 831 bool accumulate = true; 832 bool fixed_codelen = true; 833 Register work; 834 835 if (fixed_codelen) { 836 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 837 } else { 838 accumulate = (a == tmp); 839 } 840 work = tmp; 841 842 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 843 if (disp12 < 0) { 844 z_le(t, si20, work); 845 } else { 846 if (accumulate) { 847 z_le(t, disp12, work); 848 } else { 849 z_le(t, disp12, work, a); 850 } 851 } 852 } 853 } 854 855 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { 856 if (Displacement::is_validDisp(si20)) { 857 z_ldy(t, si20, a); 858 } else { 859 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset 860 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant 861 // pool loads). 862 bool accumulate = true; 863 bool fixed_codelen = true; 864 Register work; 865 866 if (fixed_codelen) { 867 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. 868 } else { 869 accumulate = (a == tmp); 870 } 871 work = tmp; 872 873 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); 874 if (disp12 < 0) { 875 z_ld(t, si20, work); 876 } else { 877 if (accumulate) { 878 z_ld(t, disp12, work); 879 } else { 880 z_ld(t, disp12, work, a); 881 } 882 } 883 } 884 } 885 886 // PCrelative TOC access. 887 // Returns distance (in bytes) from current position to start of consts section. 888 // Returns 0 (zero) if no consts section exists or if it has size zero. 889 long MacroAssembler::toc_distance() { 890 CodeSection* cs = code()->consts(); 891 return (long)((cs != NULL) ? cs->start()-pc() : 0); 892 } 893 894 // Implementation on x86/sparc assumes that constant and instruction section are 895 // adjacent, but this doesn't hold. Two special situations may occur, that we must 896 // be able to handle: 897 // 1. const section may be located apart from the inst section. 898 // 2. const section may be empty 899 // In both cases, we use the const section's start address to compute the "TOC", 900 // this seems to occur only temporarily; in the final step we always seem to end up 901 // with the pc-relatice variant. 902 // 903 // PC-relative offset could be +/-2**32 -> use long for disp 904 // Furthermore: makes no sense to have special code for 905 // adjacent const and inst sections. 906 void MacroAssembler::load_toc(Register Rtoc) { 907 // Simply use distance from start of const section (should be patched in the end). 908 long disp = toc_distance(); 909 910 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 911 relocate(rspec); 912 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 913 } 914 915 // PCrelative TOC access. 916 // Load from anywhere pcrelative (with relocation of load instr) 917 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 918 address pc = this->pc(); 919 ptrdiff_t total_distance = dataLocation - pc; 920 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 921 922 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 923 assert(total_distance != 0, "sanity"); 924 925 // Some extra safety net. 926 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 927 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 928 } 929 930 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 931 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 932 } 933 934 935 // PCrelative TOC access. 936 // Load from anywhere pcrelative (with relocation of load instr) 937 // loaded addr has to be relocated when added to constant pool. 938 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 939 address pc = this->pc(); 940 ptrdiff_t total_distance = addrLocation - pc; 941 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 942 943 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 944 945 // Some extra safety net. 946 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 947 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 948 } 949 950 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 951 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 952 } 953 954 // Generic operation: load a value from memory and test. 955 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 956 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 957 z_lb(dst, a); 958 z_ltr(dst, dst); 959 } 960 961 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 962 int64_t disp = a.disp20(); 963 if (Displacement::is_shortDisp(disp)) { 964 z_lh(dst, a); 965 } else if (Displacement::is_longDisp(disp)) { 966 z_lhy(dst, a); 967 } else { 968 guarantee(false, "displacement out of range"); 969 } 970 z_ltr(dst, dst); 971 } 972 973 void MacroAssembler::load_and_test_int(Register dst, const Address &a) { 974 z_lt(dst, a); 975 } 976 977 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { 978 z_ltgf(dst, a); 979 } 980 981 void MacroAssembler::load_and_test_long(Register dst, const Address &a) { 982 z_ltg(dst, a); 983 } 984 985 // Test a bit in memory. 986 void MacroAssembler::testbit(const Address &a, unsigned int bit) { 987 assert(a.index() == noreg, "no index reg allowed in testbit"); 988 if (bit <= 7) { 989 z_tm(a.disp() + 3, a.base(), 1 << bit); 990 } else if (bit <= 15) { 991 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); 992 } else if (bit <= 23) { 993 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); 994 } else if (bit <= 31) { 995 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); 996 } else { 997 ShouldNotReachHere(); 998 } 999 } 1000 1001 // Test a bit in a register. Result is reflected in CC. 1002 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1003 if (bitPos < 16) { 1004 z_tmll(r, 1U<<bitPos); 1005 } else if (bitPos < 32) { 1006 z_tmlh(r, 1U<<(bitPos-16)); 1007 } else if (bitPos < 48) { 1008 z_tmhl(r, 1U<<(bitPos-32)); 1009 } else if (bitPos < 64) { 1010 z_tmhh(r, 1U<<(bitPos-48)); 1011 } else { 1012 ShouldNotReachHere(); 1013 } 1014 } 1015 1016 void MacroAssembler::prefetch_read(Address a) { 1017 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1018 } 1019 void MacroAssembler::prefetch_update(Address a) { 1020 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1021 } 1022 1023 // Clear a register, i.e. load const zero into reg. 1024 // Return len (in bytes) of generated instruction(s). 1025 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1026 // set_cc: Use instruction that sets the condition code, if true. 1027 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1028 unsigned int start_off = offset(); 1029 if (whole_reg) { 1030 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1031 } else { // Only 32bit register. 1032 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1033 } 1034 return offset() - start_off; 1035 } 1036 1037 #ifdef ASSERT 1038 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1039 switch (pattern_len) { 1040 case 1: 1041 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1042 case 2: 1043 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16); 1044 case 4: 1045 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32); 1046 case 8: 1047 return load_const_optimized_rtn_len(r, pattern, true); 1048 break; 1049 default: 1050 guarantee(false, "preset_reg: bad len"); 1051 } 1052 return 0; 1053 } 1054 #endif 1055 1056 // addr: Address descriptor of memory to clear index register will not be used ! 1057 // size: Number of bytes to clear. 1058 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!! 1059 // !!! Use store_const() instead !!! 1060 void MacroAssembler::clear_mem(const Address& addr, unsigned size) { 1061 guarantee(size <= 256, "MacroAssembler::clear_mem: size too large"); 1062 1063 if (size == 1) { 1064 z_mvi(addr, 0); 1065 return; 1066 } 1067 1068 switch (size) { 1069 case 2: z_mvhhi(addr, 0); 1070 return; 1071 case 4: z_mvhi(addr, 0); 1072 return; 1073 case 8: z_mvghi(addr, 0); 1074 return; 1075 default: ; // Fallthru to xc. 1076 } 1077 1078 z_xc(addr, size, addr); 1079 } 1080 1081 void MacroAssembler::align(int modulus) { 1082 while (offset() % modulus != 0) z_nop(); 1083 } 1084 1085 // Special version for non-relocateable code if required alignment 1086 // is larger than CodeEntryAlignment. 1087 void MacroAssembler::align_address(int modulus) { 1088 while ((uintptr_t)pc() % modulus != 0) z_nop(); 1089 } 1090 1091 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1092 Register temp_reg, 1093 int64_t extra_slot_offset) { 1094 // On Z, we can have index and disp in an Address. So don't call argument_offset, 1095 // which issues an unnecessary add instruction. 1096 int stackElementSize = Interpreter::stackElementSize; 1097 int64_t offset = extra_slot_offset * stackElementSize; 1098 const Register argbase = Z_esp; 1099 if (arg_slot.is_constant()) { 1100 offset += arg_slot.as_constant() * stackElementSize; 1101 return Address(argbase, offset); 1102 } 1103 // else 1104 assert(temp_reg != noreg, "must specify"); 1105 assert(temp_reg != Z_ARG1, "base and index are conflicting"); 1106 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3 1107 return Address(argbase, temp_reg, offset); 1108 } 1109 1110 1111 //=================================================================== 1112 //=== START C O N S T A N T S I N C O D E S T R E A M === 1113 //=================================================================== 1114 //=== P A T CH A B L E C O N S T A N T S === 1115 //=================================================================== 1116 1117 1118 //--------------------------------------------------- 1119 // Load (patchable) constant into register 1120 //--------------------------------------------------- 1121 1122 1123 // Load absolute address (and try to optimize). 1124 // Note: This method is usable only for position-fixed code, 1125 // referring to a position-fixed target location. 1126 // If not so, relocations and patching must be used. 1127 void MacroAssembler::load_absolute_address(Register d, address addr) { 1128 assert(addr != NULL, "should not happen"); 1129 BLOCK_COMMENT("load_absolute_address:"); 1130 if (addr == NULL) { 1131 z_larl(d, pc()); // Dummy emit for size calc. 1132 return; 1133 } 1134 1135 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) { 1136 z_larl(d, addr); 1137 return; 1138 } 1139 1140 load_const_optimized(d, (long)addr); 1141 } 1142 1143 // Load a 64bit constant. 1144 // Patchable code sequence, but not atomically patchable. 1145 // Make sure to keep code size constant -> no value-dependent optimizations. 1146 // Do not kill condition code. 1147 void MacroAssembler::load_const(Register t, long x) { 1148 // Note: Right shift is only cleanly defined for unsigned types 1149 // or for signed types with nonnegative values. 1150 Assembler::z_iihf(t, (long)((unsigned long)x >> 32)); 1151 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL)); 1152 } 1153 1154 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend. 1155 // Patchable code sequence, but not atomically patchable. 1156 // Make sure to keep code size constant -> no value-dependent optimizations. 1157 // Do not kill condition code. 1158 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { 1159 if (sign_extend) { Assembler::z_lgfi(t, x); } 1160 else { Assembler::z_llilf(t, x); } 1161 } 1162 1163 // Load narrow oop constant, no decompression. 1164 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { 1165 assert(UseCompressedOops, "must be on to call this method"); 1166 load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/); 1167 } 1168 1169 // Load narrow klass constant, compression required. 1170 void MacroAssembler::load_narrow_klass(Register t, Klass* k) { 1171 assert(UseCompressedClassPointers, "must be on to call this method"); 1172 narrowKlass encoded_k = CompressedKlassPointers::encode(k); 1173 load_const_32to64(t, encoded_k, false /*sign_extend*/); 1174 } 1175 1176 //------------------------------------------------------ 1177 // Compare (patchable) constant with register. 1178 //------------------------------------------------------ 1179 1180 // Compare narrow oop in reg with narrow oop constant, no decompression. 1181 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { 1182 assert(UseCompressedOops, "must be on to call this method"); 1183 1184 Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2)); 1185 } 1186 1187 // Compare narrow oop in reg with narrow oop constant, no decompression. 1188 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { 1189 assert(UseCompressedClassPointers, "must be on to call this method"); 1190 narrowKlass encoded_k = CompressedKlassPointers::encode(klass2); 1191 1192 Assembler::z_clfi(klass1, encoded_k); 1193 } 1194 1195 //---------------------------------------------------------- 1196 // Check which kind of load_constant we have here. 1197 //---------------------------------------------------------- 1198 1199 // Detection of CPU version dependent load_const sequence. 1200 // The detection is valid only for code sequences generated by load_const, 1201 // not load_const_optimized. 1202 bool MacroAssembler::is_load_const(address a) { 1203 unsigned long inst1, inst2; 1204 unsigned int len1, len2; 1205 1206 len1 = get_instruction(a, &inst1); 1207 len2 = get_instruction(a + len1, &inst2); 1208 1209 return is_z_iihf(inst1) && is_z_iilf(inst2); 1210 } 1211 1212 // Detection of CPU version dependent load_const_32to64 sequence. 1213 // Mostly used for narrow oops and narrow Klass pointers. 1214 // The detection is valid only for code sequences generated by load_const_32to64. 1215 bool MacroAssembler::is_load_const_32to64(address pos) { 1216 unsigned long inst1, inst2; 1217 unsigned int len1; 1218 1219 len1 = get_instruction(pos, &inst1); 1220 return is_z_llilf(inst1); 1221 } 1222 1223 // Detection of compare_immediate_narrow sequence. 1224 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1225 bool MacroAssembler::is_compare_immediate32(address pos) { 1226 return is_equal(pos, CLFI_ZOPC, RIL_MASK); 1227 } 1228 1229 // Detection of compare_immediate_narrow sequence. 1230 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop. 1231 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { 1232 return is_compare_immediate32(pos); 1233 } 1234 1235 // Detection of compare_immediate_narrow sequence. 1236 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass. 1237 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { 1238 return is_compare_immediate32(pos); 1239 } 1240 1241 //----------------------------------- 1242 // patch the load_constant 1243 //----------------------------------- 1244 1245 // CPU-version dependent patching of load_const. 1246 void MacroAssembler::patch_const(address a, long x) { 1247 assert(is_load_const(a), "not a load of a constant"); 1248 // Note: Right shift is only cleanly defined for unsigned types 1249 // or for signed types with nonnegative values. 1250 set_imm32((address)a, (long)((unsigned long)x >> 32)); 1251 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL)); 1252 } 1253 1254 // Patching the value of CPU version dependent load_const_32to64 sequence. 1255 // The passed ptr MUST be in compressed format! 1256 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { 1257 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); 1258 1259 set_imm32(pos, np); 1260 return 6; 1261 } 1262 1263 // Patching the value of CPU version dependent compare_immediate_narrow sequence. 1264 // The passed ptr MUST be in compressed format! 1265 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { 1266 assert(is_compare_immediate32(pos), "not a compressed ptr compare"); 1267 1268 set_imm32(pos, np); 1269 return 6; 1270 } 1271 1272 // Patching the immediate value of CPU version dependent load_narrow_oop sequence. 1273 // The passed ptr must NOT be in compressed format! 1274 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { 1275 assert(UseCompressedOops, "Can only patch compressed oops"); 1276 return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o)); 1277 } 1278 1279 // Patching the immediate value of CPU version dependent load_narrow_klass sequence. 1280 // The passed ptr must NOT be in compressed format! 1281 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { 1282 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1283 1284 narrowKlass nk = CompressedKlassPointers::encode(k); 1285 return patch_load_const_32to64(pos, nk); 1286 } 1287 1288 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. 1289 // The passed ptr must NOT be in compressed format! 1290 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { 1291 assert(UseCompressedOops, "Can only patch compressed oops"); 1292 return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o)); 1293 } 1294 1295 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. 1296 // The passed ptr must NOT be in compressed format! 1297 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { 1298 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); 1299 1300 narrowKlass nk = CompressedKlassPointers::encode(k); 1301 return patch_compare_immediate_32(pos, nk); 1302 } 1303 1304 //------------------------------------------------------------------------ 1305 // Extract the constant from a load_constant instruction stream. 1306 //------------------------------------------------------------------------ 1307 1308 // Get constant from a load_const sequence. 1309 long MacroAssembler::get_const(address a) { 1310 assert(is_load_const(a), "not a load of a constant"); 1311 unsigned long x; 1312 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); 1313 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); 1314 return (long) x; 1315 } 1316 1317 //-------------------------------------- 1318 // Store a constant in memory. 1319 //-------------------------------------- 1320 1321 // General emitter to move a constant to memory. 1322 // The store is atomic. 1323 // o Address must be given in RS format (no index register) 1324 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. 1325 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. 1326 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. 1327 // o Memory slot must be at least as wide as constant, will assert otherwise. 1328 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. 1329 int MacroAssembler::store_const(const Address &dest, long imm, 1330 unsigned int lm, unsigned int lc, 1331 Register scratch) { 1332 int64_t disp = dest.disp(); 1333 Register base = dest.base(); 1334 assert(!dest.has_index(), "not supported"); 1335 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); 1336 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); 1337 assert(lm>=lc, "memory slot too small"); 1338 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); 1339 assert(Displacement::is_validDisp(disp), "displacement out of range"); 1340 1341 bool is_shortDisp = Displacement::is_shortDisp(disp); 1342 int store_offset = -1; 1343 1344 // For target len == 1 it's easy. 1345 if (lm == 1) { 1346 store_offset = offset(); 1347 if (is_shortDisp) { 1348 z_mvi(disp, base, imm); 1349 return store_offset; 1350 } else { 1351 z_mviy(disp, base, imm); 1352 return store_offset; 1353 } 1354 } 1355 1356 // All the "good stuff" takes an unsigned displacement. 1357 if (is_shortDisp) { 1358 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. 1359 1360 store_offset = offset(); 1361 switch (lm) { 1362 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. 1363 z_mvhhi(disp, base, imm); 1364 return store_offset; 1365 case 4: 1366 if (Immediate::is_simm16(imm)) { 1367 z_mvhi(disp, base, imm); 1368 return store_offset; 1369 } 1370 break; 1371 case 8: 1372 if (Immediate::is_simm16(imm)) { 1373 z_mvghi(disp, base, imm); 1374 return store_offset; 1375 } 1376 break; 1377 default: 1378 ShouldNotReachHere(); 1379 break; 1380 } 1381 } 1382 1383 // Can't optimize, so load value and store it. 1384 guarantee(scratch != noreg, " need a scratch register here !"); 1385 if (imm != 0) { 1386 load_const_optimized(scratch, imm); // Preserves CC anyway. 1387 } else { 1388 // Leave CC alone!! 1389 (void) clear_reg(scratch, true, false); // Indicate unused result. 1390 } 1391 1392 store_offset = offset(); 1393 if (is_shortDisp) { 1394 switch (lm) { 1395 case 2: 1396 z_sth(scratch, disp, Z_R0, base); 1397 return store_offset; 1398 case 4: 1399 z_st(scratch, disp, Z_R0, base); 1400 return store_offset; 1401 case 8: 1402 z_stg(scratch, disp, Z_R0, base); 1403 return store_offset; 1404 default: 1405 ShouldNotReachHere(); 1406 break; 1407 } 1408 } else { 1409 switch (lm) { 1410 case 2: 1411 z_sthy(scratch, disp, Z_R0, base); 1412 return store_offset; 1413 case 4: 1414 z_sty(scratch, disp, Z_R0, base); 1415 return store_offset; 1416 case 8: 1417 z_stg(scratch, disp, Z_R0, base); 1418 return store_offset; 1419 default: 1420 ShouldNotReachHere(); 1421 break; 1422 } 1423 } 1424 return -1; // should not reach here 1425 } 1426 1427 //=================================================================== 1428 //=== N O T P A T CH A B L E C O N S T A N T S === 1429 //=================================================================== 1430 1431 // Load constant x into register t with a fast instruction sequence 1432 // depending on the bits in x. Preserves CC under all circumstances. 1433 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { 1434 if (x == 0) { 1435 int len; 1436 if (emit) { 1437 len = clear_reg(t, true, false); 1438 } else { 1439 len = 4; 1440 } 1441 return len; 1442 } 1443 1444 if (Immediate::is_simm16(x)) { 1445 if (emit) { z_lghi(t, x); } 1446 return 4; 1447 } 1448 1449 // 64 bit value: | part1 | part2 | part3 | part4 | 1450 // At least one part is not zero! 1451 // Note: Right shift is only cleanly defined for unsigned types 1452 // or for signed types with nonnegative values. 1453 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff; 1454 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff; 1455 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff; 1456 int part4 = (int)x & 0x0000ffff; 1457 int part12 = (int)((unsigned long)x >> 32); 1458 int part34 = (int)x; 1459 1460 // Lower word only (unsigned). 1461 if (part12 == 0) { 1462 if (part3 == 0) { 1463 if (emit) z_llill(t, part4); 1464 return 4; 1465 } 1466 if (part4 == 0) { 1467 if (emit) z_llilh(t, part3); 1468 return 4; 1469 } 1470 if (emit) z_llilf(t, part34); 1471 return 6; 1472 } 1473 1474 // Upper word only. 1475 if (part34 == 0) { 1476 if (part1 == 0) { 1477 if (emit) z_llihl(t, part2); 1478 return 4; 1479 } 1480 if (part2 == 0) { 1481 if (emit) z_llihh(t, part1); 1482 return 4; 1483 } 1484 if (emit) z_llihf(t, part12); 1485 return 6; 1486 } 1487 1488 // Lower word only (signed). 1489 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { 1490 if (emit) z_lgfi(t, part34); 1491 return 6; 1492 } 1493 1494 int len = 0; 1495 1496 if ((part1 == 0) || (part2 == 0)) { 1497 if (part1 == 0) { 1498 if (emit) z_llihl(t, part2); 1499 len += 4; 1500 } else { 1501 if (emit) z_llihh(t, part1); 1502 len += 4; 1503 } 1504 } else { 1505 if (emit) z_llihf(t, part12); 1506 len += 6; 1507 } 1508 1509 if ((part3 == 0) || (part4 == 0)) { 1510 if (part3 == 0) { 1511 if (emit) z_iill(t, part4); 1512 len += 4; 1513 } else { 1514 if (emit) z_iilh(t, part3); 1515 len += 4; 1516 } 1517 } else { 1518 if (emit) z_iilf(t, part34); 1519 len += 6; 1520 } 1521 return len; 1522 } 1523 1524 //===================================================================== 1525 //=== H I G H E R L E V E L B R A N C H E M I T T E R S === 1526 //===================================================================== 1527 1528 // Note: In the worst case, one of the scratch registers is destroyed!!! 1529 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1530 // Right operand is constant. 1531 if (x2.is_constant()) { 1532 jlong value = x2.as_constant(); 1533 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); 1534 return; 1535 } 1536 1537 // Right operand is in register. 1538 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); 1539 } 1540 1541 // Note: In the worst case, one of the scratch registers is destroyed!!! 1542 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1543 // Right operand is constant. 1544 if (x2.is_constant()) { 1545 jlong value = x2.as_constant(); 1546 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); 1547 return; 1548 } 1549 1550 // Right operand is in register. 1551 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); 1552 } 1553 1554 // Note: In the worst case, one of the scratch registers is destroyed!!! 1555 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1556 // Right operand is constant. 1557 if (x2.is_constant()) { 1558 jlong value = x2.as_constant(); 1559 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); 1560 return; 1561 } 1562 1563 // Right operand is in register. 1564 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); 1565 } 1566 1567 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { 1568 // Right operand is constant. 1569 if (x2.is_constant()) { 1570 jlong value = x2.as_constant(); 1571 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); 1572 return; 1573 } 1574 1575 // Right operand is in register. 1576 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); 1577 } 1578 1579 // Generate an optimal branch to the branch target. 1580 // Optimal means that a relative branch (brc or brcl) is used if the 1581 // branch distance is short enough. Loading the target address into a 1582 // register and branching via reg is used as fallback only. 1583 // 1584 // Used registers: 1585 // Z_R1 - work reg. Holds branch target address. 1586 // Used in fallback case only. 1587 // 1588 // This version of branch_optimized is good for cases where the target address is known 1589 // and constant, i.e. is never changed (no relocation, no patching). 1590 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { 1591 address branch_origin = pc(); 1592 1593 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1594 z_brc(cond, branch_addr); 1595 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { 1596 z_brcl(cond, branch_addr); 1597 } else { 1598 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. 1599 z_bcr(cond, Z_R1); 1600 } 1601 } 1602 1603 // This version of branch_optimized is good for cases where the target address 1604 // is potentially not yet known at the time the code is emitted. 1605 // 1606 // One very common case is a branch to an unbound label which is handled here. 1607 // The caller might know (or hope) that the branch distance is short enough 1608 // to be encoded in a 16bit relative address. In this case he will pass a 1609 // NearLabel branch_target. 1610 // Care must be taken with unbound labels. Each call to target(label) creates 1611 // an entry in the patch queue for that label to patch all references of the label 1612 // once it gets bound. Those recorded patch locations must be patchable. Otherwise, 1613 // an assertion fires at patch time. 1614 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { 1615 if (branch_target.is_bound()) { 1616 address branch_addr = target(branch_target); 1617 branch_optimized(cond, branch_addr); 1618 } else if (branch_target.is_near()) { 1619 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc. 1620 } else { 1621 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. 1622 } 1623 } 1624 1625 // Generate an optimal compare and branch to the branch target. 1626 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1627 // branch distance is short enough. Loading the target address into a 1628 // register and branching via reg is used as fallback only. 1629 // 1630 // Input: 1631 // r1 - left compare operand 1632 // r2 - right compare operand 1633 void MacroAssembler::compare_and_branch_optimized(Register r1, 1634 Register r2, 1635 Assembler::branch_condition cond, 1636 address branch_addr, 1637 bool len64, 1638 bool has_sign) { 1639 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1640 1641 address branch_origin = pc(); 1642 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { 1643 switch (casenum) { 1644 case 0: z_crj( r1, r2, cond, branch_addr); break; 1645 case 1: z_clrj (r1, r2, cond, branch_addr); break; 1646 case 2: z_cgrj(r1, r2, cond, branch_addr); break; 1647 case 3: z_clgrj(r1, r2, cond, branch_addr); break; 1648 default: ShouldNotReachHere(); break; 1649 } 1650 } else { 1651 switch (casenum) { 1652 case 0: z_cr( r1, r2); break; 1653 case 1: z_clr(r1, r2); break; 1654 case 2: z_cgr(r1, r2); break; 1655 case 3: z_clgr(r1, r2); break; 1656 default: ShouldNotReachHere(); break; 1657 } 1658 branch_optimized(cond, branch_addr); 1659 } 1660 } 1661 1662 // Generate an optimal compare and branch to the branch target. 1663 // Optimal means that a relative branch (clgij, brc or brcl) is used if the 1664 // branch distance is short enough. Loading the target address into a 1665 // register and branching via reg is used as fallback only. 1666 // 1667 // Input: 1668 // r1 - left compare operand (in register) 1669 // x2 - right compare operand (immediate) 1670 void MacroAssembler::compare_and_branch_optimized(Register r1, 1671 jlong x2, 1672 Assembler::branch_condition cond, 1673 Label& branch_target, 1674 bool len64, 1675 bool has_sign) { 1676 address branch_origin = pc(); 1677 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); 1678 bool is_RelAddr16 = branch_target.is_near() || 1679 (branch_target.is_bound() && 1680 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); 1681 unsigned int casenum = (len64?2:0)+(has_sign?0:1); 1682 1683 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { 1684 switch (casenum) { 1685 case 0: z_cij( r1, x2, cond, branch_target); break; 1686 case 1: z_clij(r1, x2, cond, branch_target); break; 1687 case 2: z_cgij(r1, x2, cond, branch_target); break; 1688 case 3: z_clgij(r1, x2, cond, branch_target); break; 1689 default: ShouldNotReachHere(); break; 1690 } 1691 return; 1692 } 1693 1694 if (x2 == 0) { 1695 switch (casenum) { 1696 case 0: z_ltr(r1, r1); break; 1697 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1698 case 2: z_ltgr(r1, r1); break; 1699 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! 1700 default: ShouldNotReachHere(); break; 1701 } 1702 } else { 1703 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { 1704 switch (casenum) { 1705 case 0: z_chi(r1, x2); break; 1706 case 1: z_chi(r1, x2); break; // positive immediate < 2**15 1707 case 2: z_cghi(r1, x2); break; 1708 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 1709 default: break; 1710 } 1711 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { 1712 switch (casenum) { 1713 case 0: z_cfi( r1, x2); break; 1714 case 1: z_clfi(r1, x2); break; 1715 case 2: z_cgfi(r1, x2); break; 1716 case 3: z_clgfi(r1, x2); break; 1717 default: ShouldNotReachHere(); break; 1718 } 1719 } else { 1720 // No instruction with immediate operand possible, so load into register. 1721 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; 1722 load_const_optimized(scratch, x2); 1723 switch (casenum) { 1724 case 0: z_cr( r1, scratch); break; 1725 case 1: z_clr(r1, scratch); break; 1726 case 2: z_cgr(r1, scratch); break; 1727 case 3: z_clgr(r1, scratch); break; 1728 default: ShouldNotReachHere(); break; 1729 } 1730 } 1731 } 1732 branch_optimized(cond, branch_target); 1733 } 1734 1735 // Generate an optimal compare and branch to the branch target. 1736 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the 1737 // branch distance is short enough. Loading the target address into a 1738 // register and branching via reg is used as fallback only. 1739 // 1740 // Input: 1741 // r1 - left compare operand 1742 // r2 - right compare operand 1743 void MacroAssembler::compare_and_branch_optimized(Register r1, 1744 Register r2, 1745 Assembler::branch_condition cond, 1746 Label& branch_target, 1747 bool len64, 1748 bool has_sign) { 1749 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1); 1750 1751 if (branch_target.is_bound()) { 1752 address branch_addr = target(branch_target); 1753 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); 1754 } else { 1755 if (VM_Version::has_CompareBranch() && branch_target.is_near()) { 1756 switch (casenum) { 1757 case 0: z_crj( r1, r2, cond, branch_target); break; 1758 case 1: z_clrj( r1, r2, cond, branch_target); break; 1759 case 2: z_cgrj( r1, r2, cond, branch_target); break; 1760 case 3: z_clgrj(r1, r2, cond, branch_target); break; 1761 default: ShouldNotReachHere(); break; 1762 } 1763 } else { 1764 switch (casenum) { 1765 case 0: z_cr( r1, r2); break; 1766 case 1: z_clr(r1, r2); break; 1767 case 2: z_cgr(r1, r2); break; 1768 case 3: z_clgr(r1, r2); break; 1769 default: ShouldNotReachHere(); break; 1770 } 1771 branch_optimized(cond, branch_target); 1772 } 1773 } 1774 } 1775 1776 //=========================================================================== 1777 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S === 1778 //=========================================================================== 1779 1780 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 1781 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1782 int index = oop_recorder()->allocate_metadata_index(obj); 1783 RelocationHolder rspec = metadata_Relocation::spec(index); 1784 return AddressLiteral((address)obj, rspec); 1785 } 1786 1787 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 1788 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1789 int index = oop_recorder()->find_index(obj); 1790 RelocationHolder rspec = metadata_Relocation::spec(index); 1791 return AddressLiteral((address)obj, rspec); 1792 } 1793 1794 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 1795 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1796 int oop_index = oop_recorder()->allocate_oop_index(obj); 1797 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1798 } 1799 1800 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 1801 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1802 int oop_index = oop_recorder()->find_index(obj); 1803 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 1804 } 1805 1806 // NOTE: destroys r 1807 void MacroAssembler::c2bool(Register r, Register t) { 1808 z_lcr(t, r); // t = -r 1809 z_or(r, t); // r = -r OR r 1810 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. 1811 } 1812 1813 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' 1814 // and return the resulting instruction. 1815 // Dest_pos and inst_pos are 32 bit only. These parms can only designate 1816 // relative positions. 1817 // Use correct argument types. Do not pre-calculate distance. 1818 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { 1819 int c = 0; 1820 unsigned long patched_inst = 0; 1821 if (is_call_pcrelative_short(inst) || 1822 is_branch_pcrelative_short(inst) || 1823 is_branchoncount_pcrelative_short(inst) || 1824 is_branchonindex32_pcrelative_short(inst)) { 1825 c = 1; 1826 int m = fmask(15, 0); // simm16(-1, 16, 32); 1827 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); 1828 patched_inst = (inst & ~m) | v; 1829 } else if (is_compareandbranch_pcrelative_short(inst)) { 1830 c = 2; 1831 long m = fmask(31, 16); // simm16(-1, 16, 48); 1832 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1833 patched_inst = (inst & ~m) | v; 1834 } else if (is_branchonindex64_pcrelative_short(inst)) { 1835 c = 3; 1836 long m = fmask(31, 16); // simm16(-1, 16, 48); 1837 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); 1838 patched_inst = (inst & ~m) | v; 1839 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { 1840 c = 4; 1841 long m = fmask(31, 0); // simm32(-1, 16, 48); 1842 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1843 patched_inst = (inst & ~m) | v; 1844 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. 1845 c = 5; 1846 long m = fmask(31, 0); // simm32(-1, 16, 48); 1847 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); 1848 patched_inst = (inst & ~m) | v; 1849 } else { 1850 print_dbg_msg(tty, inst, "not a relative branch", 0); 1851 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); 1852 ShouldNotReachHere(); 1853 } 1854 1855 long new_off = get_pcrel_offset(patched_inst); 1856 if (new_off != (dest_pos-inst_pos)) { 1857 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); 1858 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); 1859 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); 1860 #ifdef LUCY_DBG 1861 VM_Version::z_SIGSEGV(); 1862 #endif 1863 ShouldNotReachHere(); 1864 } 1865 return patched_inst; 1866 } 1867 1868 // Only called when binding labels (share/vm/asm/assembler.cpp) 1869 // Pass arguments as intended. Do not pre-calculate distance. 1870 void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { 1871 unsigned long stub_inst; 1872 int inst_len = get_instruction(branch, &stub_inst); 1873 1874 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); 1875 } 1876 1877 1878 // Extract relative address (aka offset). 1879 // inv_simm16 works for 4-byte instructions only. 1880 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle". 1881 long MacroAssembler::get_pcrel_offset(unsigned long inst) { 1882 1883 if (MacroAssembler::is_pcrelative_short(inst)) { 1884 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { 1885 return RelAddr::inv_pcrel_off16(inv_simm16(inst)); 1886 } else { 1887 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); 1888 } 1889 } 1890 1891 if (MacroAssembler::is_pcrelative_long(inst)) { 1892 return RelAddr::inv_pcrel_off32(inv_simm32(inst)); 1893 } 1894 1895 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); 1896 #ifdef LUCY_DBG 1897 VM_Version::z_SIGSEGV(); 1898 #else 1899 ShouldNotReachHere(); 1900 #endif 1901 return -1; 1902 } 1903 1904 long MacroAssembler::get_pcrel_offset(address pc) { 1905 unsigned long inst; 1906 unsigned int len = get_instruction(pc, &inst); 1907 1908 #ifdef ASSERT 1909 long offset; 1910 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { 1911 offset = get_pcrel_offset(inst); 1912 } else { 1913 offset = -1; 1914 } 1915 1916 if (offset == -1) { 1917 dump_code_range(tty, pc, 32, "not a pcrelative instruction"); 1918 #ifdef LUCY_DBG 1919 VM_Version::z_SIGSEGV(); 1920 #else 1921 ShouldNotReachHere(); 1922 #endif 1923 } 1924 return offset; 1925 #else 1926 return get_pcrel_offset(inst); 1927 #endif // ASSERT 1928 } 1929 1930 // Get target address from pc-relative instructions. 1931 address MacroAssembler::get_target_addr_pcrel(address pc) { 1932 assert(is_pcrelative_long(pc), "not a pcrelative instruction"); 1933 return pc + get_pcrel_offset(pc); 1934 } 1935 1936 // Patch pc relative load address. 1937 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { 1938 unsigned long inst; 1939 // Offset is +/- 2**32 -> use long. 1940 ptrdiff_t distance = con - pc; 1941 1942 get_instruction(pc, &inst); 1943 1944 if (is_pcrelative_short(inst)) { 1945 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. 1946 1947 // Some extra safety net. 1948 if (!RelAddr::is_in_range_of_RelAddr16(distance)) { 1949 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); 1950 dump_code_range(tty, pc, 32, "distance out of range (16bit)"); 1951 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); 1952 } 1953 return; 1954 } 1955 1956 if (is_pcrelative_long(inst)) { 1957 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); 1958 1959 // Some Extra safety net. 1960 if (!RelAddr::is_in_range_of_RelAddr32(distance)) { 1961 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); 1962 dump_code_range(tty, pc, 32, "distance out of range (32bit)"); 1963 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); 1964 } 1965 return; 1966 } 1967 1968 guarantee(false, "not a pcrelative instruction to patch!"); 1969 } 1970 1971 // "Current PC" here means the address just behind the basr instruction. 1972 address MacroAssembler::get_PC(Register result) { 1973 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. 1974 return pc(); 1975 } 1976 1977 // Get current PC + offset. 1978 // Offset given in bytes, must be even! 1979 // "Current PC" here means the address of the larl instruction plus the given offset. 1980 address MacroAssembler::get_PC(Register result, int64_t offset) { 1981 address here = pc(); 1982 z_larl(result, offset/2); // Save target instruction address in result. 1983 return here + offset; 1984 } 1985 1986 void MacroAssembler::instr_size(Register size, Register pc) { 1987 // Extract 2 most significant bits of current instruction. 1988 z_llgc(size, Address(pc)); 1989 z_srl(size, 6); 1990 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6. 1991 z_ahi(size, 3); 1992 z_nill(size, 6); 1993 } 1994 1995 // Resize_frame with SP(new) = SP(old) - [offset]. 1996 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) 1997 { 1998 assert_different_registers(offset, fp, Z_SP); 1999 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } 2000 2001 z_sgr(Z_SP, offset); 2002 z_stg(fp, _z_abi(callers_sp), Z_SP); 2003 } 2004 2005 // Resize_frame with SP(new) = [newSP] + offset. 2006 // This emitter is useful if we already have calculated a pointer 2007 // into the to-be-allocated stack space, e.g. with special alignment properties, 2008 // but need some additional space, e.g. for spilling. 2009 // newSP is the pre-calculated pointer. It must not be modified. 2010 // fp holds, or is filled with, the frame pointer. 2011 // offset is the additional increment which is added to addr to form the new SP. 2012 // Note: specify a negative value to reserve more space! 2013 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2014 // It does not guarantee that fp contains the frame pointer at the end. 2015 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { 2016 assert_different_registers(newSP, fp, Z_SP); 2017 2018 if (load_fp) { 2019 z_lg(fp, _z_abi(callers_sp), Z_SP); 2020 } 2021 2022 add2reg(Z_SP, offset, newSP); 2023 z_stg(fp, _z_abi(callers_sp), Z_SP); 2024 } 2025 2026 // Resize_frame with SP(new) = [newSP]. 2027 // load_fp == true only indicates that fp is not pre-filled with the frame pointer. 2028 // It does not guarantee that fp contains the frame pointer at the end. 2029 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { 2030 assert_different_registers(newSP, fp, Z_SP); 2031 2032 if (load_fp) { 2033 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. 2034 } 2035 2036 z_lgr(Z_SP, newSP); 2037 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. 2038 z_stg(fp, _z_abi(callers_sp), newSP); 2039 } else { 2040 z_stg(fp, _z_abi(callers_sp), Z_SP); 2041 } 2042 } 2043 2044 // Resize_frame with SP(new) = SP(old) + offset. 2045 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { 2046 assert_different_registers(fp, Z_SP); 2047 2048 if (load_fp) { 2049 z_lg(fp, _z_abi(callers_sp), Z_SP); 2050 } 2051 add64(Z_SP, offset); 2052 z_stg(fp, _z_abi(callers_sp), Z_SP); 2053 } 2054 2055 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { 2056 #ifdef ASSERT 2057 assert_different_registers(bytes, old_sp, Z_SP); 2058 if (!copy_sp) { 2059 z_cgr(old_sp, Z_SP); 2060 asm_assert_eq("[old_sp]!=[Z_SP]", 0x211); 2061 } 2062 #endif 2063 if (copy_sp) { z_lgr(old_sp, Z_SP); } 2064 if (bytes_with_inverted_sign) { 2065 z_agr(Z_SP, bytes); 2066 } else { 2067 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. 2068 } 2069 z_stg(old_sp, _z_abi(callers_sp), Z_SP); 2070 } 2071 2072 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { 2073 long offset = Assembler::align(bytes, frame::alignment_in_bytes); 2074 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset); 2075 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset); 2076 2077 // We must not write outside the current stack bounds (given by Z_SP). 2078 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage. 2079 // We rely on Z_R0 by default to be available as scratch. 2080 z_lgr(scratch, Z_SP); 2081 add2reg(Z_SP, -offset); 2082 z_stg(scratch, _z_abi(callers_sp), Z_SP); 2083 #ifdef ASSERT 2084 // Just make sure nobody uses the value in the default scratch register. 2085 // When another register is used, the caller might rely on it containing the frame pointer. 2086 if (scratch == Z_R0) { 2087 z_iihf(scratch, 0xbaadbabe); 2088 z_iilf(scratch, 0xdeadbeef); 2089 } 2090 #endif 2091 return offset; 2092 } 2093 2094 // Push a frame of size `bytes' plus abi160 on top. 2095 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { 2096 BLOCK_COMMENT("push_frame_abi160 {"); 2097 unsigned int res = push_frame(bytes + frame::z_abi_160_size); 2098 BLOCK_COMMENT("} push_frame_abi160"); 2099 return res; 2100 } 2101 2102 // Pop current C frame. 2103 void MacroAssembler::pop_frame() { 2104 BLOCK_COMMENT("pop_frame:"); 2105 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); 2106 } 2107 2108 // Pop current C frame and restore return PC register (Z_R14). 2109 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { 2110 BLOCK_COMMENT("pop_frame_restore_retPC:"); 2111 int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes; 2112 // If possible, pop frame by add instead of load (a penny saved is a penny got :-). 2113 if (Displacement::is_validDisp(retPC_offset)) { 2114 z_lg(Z_R14, retPC_offset, Z_SP); 2115 add2reg(Z_SP, frame_size_in_bytes); 2116 } else { 2117 add2reg(Z_SP, frame_size_in_bytes); 2118 restore_return_pc(); 2119 } 2120 } 2121 2122 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { 2123 if (allow_relocation) { 2124 call_c(entry_point); 2125 } else { 2126 call_c_static(entry_point); 2127 } 2128 } 2129 2130 void MacroAssembler::call_VM_leaf_base(address entry_point) { 2131 bool allow_relocation = true; 2132 call_VM_leaf_base(entry_point, allow_relocation); 2133 } 2134 2135 void MacroAssembler::call_VM_base(Register oop_result, 2136 Register last_java_sp, 2137 address entry_point, 2138 bool allow_relocation, 2139 bool check_exceptions) { // Defaults to true. 2140 // Allow_relocation indicates, if true, that the generated code shall 2141 // be fit for code relocation or referenced data relocation. In other 2142 // words: all addresses must be considered variable. PC-relative addressing 2143 // is not possible then. 2144 // On the other hand, if (allow_relocation == false), addresses and offsets 2145 // may be considered stable, enabling us to take advantage of some PC-relative 2146 // addressing tweaks. These might improve performance and reduce code size. 2147 2148 // Determine last_java_sp register. 2149 if (!last_java_sp->is_valid()) { 2150 last_java_sp = Z_SP; // Load Z_SP as SP. 2151 } 2152 2153 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); 2154 2155 // ARG1 must hold thread address. 2156 z_lgr(Z_ARG1, Z_thread); 2157 2158 address return_pc = NULL; 2159 if (allow_relocation) { 2160 return_pc = call_c(entry_point); 2161 } else { 2162 return_pc = call_c_static(entry_point); 2163 } 2164 2165 reset_last_Java_frame(allow_relocation); 2166 2167 // C++ interp handles this in the interpreter. 2168 check_and_handle_popframe(Z_thread); 2169 check_and_handle_earlyret(Z_thread); 2170 2171 // Check for pending exceptions. 2172 if (check_exceptions) { 2173 // Check for pending exceptions (java_thread is set upon return). 2174 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); 2175 2176 // This used to conditionally jump to forward_exception however it is 2177 // possible if we relocate that the branch will not reach. So we must jump 2178 // around so we can always reach. 2179 2180 Label ok; 2181 z_bre(ok); // Bcondequal is the same as bcondZero. 2182 call_stub(StubRoutines::forward_exception_entry()); 2183 bind(ok); 2184 } 2185 2186 // Get oop result if there is one and reset the value in the thread. 2187 if (oop_result->is_valid()) { 2188 get_vm_result(oop_result); 2189 } 2190 2191 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. 2192 } 2193 2194 void MacroAssembler::call_VM_base(Register oop_result, 2195 Register last_java_sp, 2196 address entry_point, 2197 bool check_exceptions) { // Defaults to true. 2198 bool allow_relocation = true; 2199 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); 2200 } 2201 2202 // VM calls without explicit last_java_sp. 2203 2204 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 2205 // Call takes possible detour via InterpreterMacroAssembler. 2206 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); 2207 } 2208 2209 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 2210 // Z_ARG1 is reserved for the thread. 2211 lgr_if_needed(Z_ARG2, arg_1); 2212 call_VM(oop_result, entry_point, check_exceptions); 2213 } 2214 2215 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 2216 // Z_ARG1 is reserved for the thread. 2217 lgr_if_needed(Z_ARG2, arg_1); 2218 assert(arg_2 != Z_ARG2, "smashed argument"); 2219 lgr_if_needed(Z_ARG3, arg_2); 2220 call_VM(oop_result, entry_point, check_exceptions); 2221 } 2222 2223 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2224 Register arg_3, bool check_exceptions) { 2225 // Z_ARG1 is reserved for the thread. 2226 lgr_if_needed(Z_ARG2, arg_1); 2227 assert(arg_2 != Z_ARG2, "smashed argument"); 2228 lgr_if_needed(Z_ARG3, arg_2); 2229 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2230 lgr_if_needed(Z_ARG4, arg_3); 2231 call_VM(oop_result, entry_point, check_exceptions); 2232 } 2233 2234 // VM static calls without explicit last_java_sp. 2235 2236 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { 2237 // Call takes possible detour via InterpreterMacroAssembler. 2238 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); 2239 } 2240 2241 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, 2242 Register arg_3, bool check_exceptions) { 2243 // Z_ARG1 is reserved for the thread. 2244 lgr_if_needed(Z_ARG2, arg_1); 2245 assert(arg_2 != Z_ARG2, "smashed argument"); 2246 lgr_if_needed(Z_ARG3, arg_2); 2247 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2248 lgr_if_needed(Z_ARG4, arg_3); 2249 call_VM_static(oop_result, entry_point, check_exceptions); 2250 } 2251 2252 // VM calls with explicit last_java_sp. 2253 2254 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { 2255 // Call takes possible detour via InterpreterMacroAssembler. 2256 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); 2257 } 2258 2259 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 2260 // Z_ARG1 is reserved for the thread. 2261 lgr_if_needed(Z_ARG2, arg_1); 2262 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2263 } 2264 2265 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2266 Register arg_2, bool check_exceptions) { 2267 // Z_ARG1 is reserved for the thread. 2268 lgr_if_needed(Z_ARG2, arg_1); 2269 assert(arg_2 != Z_ARG2, "smashed argument"); 2270 lgr_if_needed(Z_ARG3, arg_2); 2271 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2272 } 2273 2274 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, 2275 Register arg_2, Register arg_3, bool check_exceptions) { 2276 // Z_ARG1 is reserved for the thread. 2277 lgr_if_needed(Z_ARG2, arg_1); 2278 assert(arg_2 != Z_ARG2, "smashed argument"); 2279 lgr_if_needed(Z_ARG3, arg_2); 2280 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); 2281 lgr_if_needed(Z_ARG4, arg_3); 2282 call_VM(oop_result, last_java_sp, entry_point, check_exceptions); 2283 } 2284 2285 // VM leaf calls. 2286 2287 void MacroAssembler::call_VM_leaf(address entry_point) { 2288 // Call takes possible detour via InterpreterMacroAssembler. 2289 call_VM_leaf_base(entry_point, true); 2290 } 2291 2292 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 2293 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2294 call_VM_leaf(entry_point); 2295 } 2296 2297 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 2298 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2299 assert(arg_2 != Z_ARG1, "smashed argument"); 2300 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2301 call_VM_leaf(entry_point); 2302 } 2303 2304 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2305 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2306 assert(arg_2 != Z_ARG1, "smashed argument"); 2307 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2308 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2309 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2310 call_VM_leaf(entry_point); 2311 } 2312 2313 // Static VM leaf calls. 2314 // Really static VM leaf calls are never patched. 2315 2316 void MacroAssembler::call_VM_leaf_static(address entry_point) { 2317 // Call takes possible detour via InterpreterMacroAssembler. 2318 call_VM_leaf_base(entry_point, false); 2319 } 2320 2321 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { 2322 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2323 call_VM_leaf_static(entry_point); 2324 } 2325 2326 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { 2327 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2328 assert(arg_2 != Z_ARG1, "smashed argument"); 2329 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2330 call_VM_leaf_static(entry_point); 2331 } 2332 2333 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 2334 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); 2335 assert(arg_2 != Z_ARG1, "smashed argument"); 2336 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); 2337 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); 2338 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); 2339 call_VM_leaf_static(entry_point); 2340 } 2341 2342 // Don't use detour via call_c(reg). 2343 address MacroAssembler::call_c(address function_entry) { 2344 load_const(Z_R1, function_entry); 2345 return call(Z_R1); 2346 } 2347 2348 // Variant for really static (non-relocatable) calls which are never patched. 2349 address MacroAssembler::call_c_static(address function_entry) { 2350 load_absolute_address(Z_R1, function_entry); 2351 #if 0 // def ASSERT 2352 // Verify that call site did not move. 2353 load_const_optimized(Z_R0, function_entry); 2354 z_cgr(Z_R1, Z_R0); 2355 z_brc(bcondEqual, 3); 2356 z_illtrap(0xba); 2357 #endif 2358 return call(Z_R1); 2359 } 2360 2361 address MacroAssembler::call_c_opt(address function_entry) { 2362 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); 2363 _last_calls_return_pc = success ? pc() : NULL; 2364 return _last_calls_return_pc; 2365 } 2366 2367 // Identify a call_far_patchable instruction: LARL + LG + BASR 2368 // 2369 // nop ; optionally, if required for alignment 2370 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool 2371 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary 2372 // 2373 // Code pattern will eventually get patched into variant2 (see below for detection code). 2374 // 2375 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { 2376 address iaddr = instruction_addr; 2377 2378 // Check for the actual load instruction. 2379 if (!is_load_const_from_toc(iaddr)) { return false; } 2380 iaddr += load_const_from_toc_size(); 2381 2382 // Check for the call (BASR) instruction, finally. 2383 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); 2384 return is_call_byregister(iaddr); 2385 } 2386 2387 // Identify a call_far_patchable instruction: BRASL 2388 // 2389 // Code pattern to suits atomic patching: 2390 // nop ; Optionally, if required for alignment. 2391 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). 2392 // nop ; For code pattern detection: Prepend each BRASL with a nop. 2393 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned ! 2394 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { 2395 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); 2396 2397 // Check for correct number of leading nops. 2398 address iaddr; 2399 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { 2400 if (!is_z_nop(iaddr)) { return false; } 2401 } 2402 assert(iaddr == call_addr, "sanity"); 2403 2404 // --> Check for call instruction. 2405 if (is_call_far_pcrelative(call_addr)) { 2406 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); 2407 return true; 2408 } 2409 2410 return false; 2411 } 2412 2413 // Emit a NOT mt-safely patchable 64 bit absolute call. 2414 // If toc_offset == -2, then the destination of the call (= target) is emitted 2415 // to the constant pool and a runtime_call relocation is added 2416 // to the code buffer. 2417 // If toc_offset != -2, target must already be in the constant pool at 2418 // _ctableStart+toc_offset (a caller can retrieve toc_offset 2419 // from the runtime_call relocation). 2420 // Special handling of emitting to scratch buffer when there is no constant pool. 2421 // Slightly changed code pattern. We emit an additional nop if we would 2422 // not end emitting at a word aligned address. This is to ensure 2423 // an atomically patchable displacement in brasl instructions. 2424 // 2425 // A call_far_patchable comes in different flavors: 2426 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) 2427 // - LGRL(CP) / BR (address in constant pool, pc-relative access) 2428 // - BRASL (relative address of call target coded in instruction) 2429 // All flavors occupy the same amount of space. Length differences are compensated 2430 // by leading nops, such that the instruction sequence always ends at the same 2431 // byte offset. This is required to keep the return offset constant. 2432 // Furthermore, the return address (the end of the instruction sequence) is forced 2433 // to be on a 4-byte boundary. This is required for atomic patching, should we ever 2434 // need to patch the call target of the BRASL flavor. 2435 // RETURN value: false, if no constant pool entry could be allocated, true otherwise. 2436 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { 2437 // Get current pc and ensure word alignment for end of instr sequence. 2438 const address start_pc = pc(); 2439 const intptr_t start_off = offset(); 2440 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); 2441 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. 2442 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); 2443 const bool emit_relative_call = !emit_target_to_pool && 2444 RelAddr::is_in_range_of_RelAddr32(dist) && 2445 ReoptimizeCallSequences && 2446 !code_section()->scratch_emit(); 2447 2448 if (emit_relative_call) { 2449 // Add padding to get the same size as below. 2450 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); 2451 unsigned int current_padding; 2452 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } 2453 assert(current_padding == padding, "sanity"); 2454 2455 // relative call: len = 2(nop) + 6 (brasl) 2456 // CodeBlob resize cannot occur in this case because 2457 // this call is emitted into pre-existing space. 2458 z_nop(); // Prepend each BRASL with a nop. 2459 z_brasl(Z_R14, target); 2460 } else { 2461 // absolute call: Get address from TOC. 2462 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} 2463 if (emit_target_to_pool) { 2464 // When emitting the call for the first time, we do not need to use 2465 // the pc-relative version. It will be patched anyway, when the code 2466 // buffer is copied. 2467 // Relocation is not needed when !ReoptimizeCallSequences. 2468 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; 2469 AddressLiteral dest(target, rt); 2470 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills 2471 // inst_mark(). Reset if possible. 2472 bool reset_mark = (inst_mark() == pc()); 2473 tocOffset = store_oop_in_toc(dest); 2474 if (reset_mark) { set_inst_mark(); } 2475 if (tocOffset == -1) { 2476 return false; // Couldn't create constant pool entry. 2477 } 2478 } 2479 assert(offset() == start_off, "emit no code before this point!"); 2480 2481 address tocPos = pc() + tocOffset; 2482 if (emit_target_to_pool) { 2483 tocPos = code()->consts()->start() + tocOffset; 2484 } 2485 load_long_pcrelative(Z_R14, tocPos); 2486 z_basr(Z_R14, Z_R14); 2487 } 2488 2489 #ifdef ASSERT 2490 // Assert that we can identify the emitted call. 2491 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); 2492 assert(offset() == start_off+call_far_patchable_size(), "wrong size"); 2493 2494 if (emit_target_to_pool) { 2495 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, 2496 "wrong encoding of dest address"); 2497 } 2498 #endif 2499 return true; // success 2500 } 2501 2502 // Identify a call_far_patchable instruction. 2503 // For more detailed information see header comment of call_far_patchable. 2504 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { 2505 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL 2506 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR 2507 } 2508 2509 // Does the call_far_patchable instruction use a pc-relative encoding 2510 // of the call destination? 2511 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { 2512 // Variant 2 is pc-relative. 2513 return is_call_far_patchable_variant2_at(instruction_addr); 2514 } 2515 2516 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { 2517 // Prepend each BRASL with a nop. 2518 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. 2519 } 2520 2521 // Set destination address of a call_far_patchable instruction. 2522 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { 2523 ResourceMark rm; 2524 2525 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). 2526 int code_size = MacroAssembler::call_far_patchable_size(); 2527 CodeBuffer buf(instruction_addr, code_size); 2528 MacroAssembler masm(&buf); 2529 masm.call_far_patchable(dest, tocOffset); 2530 ICache::invalidate_range(instruction_addr, code_size); // Empty on z. 2531 } 2532 2533 // Get dest address of a call_far_patchable instruction. 2534 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { 2535 // Dynamic TOC: absolute address in constant pool. 2536 // Check variant2 first, it is more frequent. 2537 2538 // Relative address encoded in call instruction. 2539 if (is_call_far_patchable_variant2_at(instruction_addr)) { 2540 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. 2541 2542 // Absolute address in constant pool. 2543 } else if (is_call_far_patchable_variant0_at(instruction_addr)) { 2544 address iaddr = instruction_addr; 2545 2546 long tocOffset = get_load_const_from_toc_offset(iaddr); 2547 address tocLoc = iaddr + tocOffset; 2548 return *(address *)(tocLoc); 2549 } else { 2550 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); 2551 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", 2552 *(unsigned long*)instruction_addr, 2553 *(unsigned long*)(instruction_addr+8), 2554 call_far_patchable_size()); 2555 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); 2556 ShouldNotReachHere(); 2557 return NULL; 2558 } 2559 } 2560 2561 void MacroAssembler::align_call_far_patchable(address pc) { 2562 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } 2563 } 2564 2565 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 2566 } 2567 2568 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 2569 } 2570 2571 // Read from the polling page. 2572 // Use TM or TMY instruction, depending on read offset. 2573 // offset = 0: Use TM, safepoint polling. 2574 // offset < 0: Use TMY, profiling safepoint polling. 2575 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { 2576 if (Immediate::is_uimm12(offset)) { 2577 z_tm(offset, polling_page_address, mask_safepoint); 2578 } else { 2579 z_tmy(offset, polling_page_address, mask_profiling); 2580 } 2581 } 2582 2583 // Check whether z_instruction is a read access to the polling page 2584 // which was emitted by load_from_polling_page(..). 2585 bool MacroAssembler::is_load_from_polling_page(address instr_loc) { 2586 unsigned long z_instruction; 2587 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2588 2589 if (ilen == 2) { return false; } // It's none of the allowed instructions. 2590 2591 if (ilen == 4) { 2592 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. 2593 2594 int ms = inv_mask(z_instruction,8,32); // mask 2595 int ra = inv_reg(z_instruction,16,32); // base register 2596 int ds = inv_uimm12(z_instruction); // displacement 2597 2598 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { 2599 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. 2600 } 2601 2602 } else { /* if (ilen == 6) */ 2603 2604 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); 2605 2606 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. 2607 2608 int ms = inv_mask(z_instruction,8,48); // mask 2609 int ra = inv_reg(z_instruction,16,48); // base register 2610 int ds = inv_simm20(z_instruction); // displacement 2611 } 2612 2613 return true; 2614 } 2615 2616 // Extract poll address from instruction and ucontext. 2617 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { 2618 assert(ucontext != NULL, "must have ucontext"); 2619 ucontext_t* uc = (ucontext_t*) ucontext; 2620 unsigned long z_instruction; 2621 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2622 2623 if (ilen == 4 && is_z_tm(z_instruction)) { 2624 int ra = inv_reg(z_instruction, 16, 32); // base register 2625 int ds = inv_uimm12(z_instruction); // displacement 2626 address addr = (address)uc->uc_mcontext.gregs[ra]; 2627 return addr + ds; 2628 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2629 int ra = inv_reg(z_instruction, 16, 48); // base register 2630 int ds = inv_simm20(z_instruction); // displacement 2631 address addr = (address)uc->uc_mcontext.gregs[ra]; 2632 return addr + ds; 2633 } 2634 2635 ShouldNotReachHere(); 2636 return NULL; 2637 } 2638 2639 // Extract poll register from instruction. 2640 uint MacroAssembler::get_poll_register(address instr_loc) { 2641 unsigned long z_instruction; 2642 unsigned int ilen = get_instruction(instr_loc, &z_instruction); 2643 2644 if (ilen == 4 && is_z_tm(z_instruction)) { 2645 return (uint)inv_reg(z_instruction, 16, 32); // base register 2646 } else if (ilen == 6 && is_z_tmy(z_instruction)) { 2647 return (uint)inv_reg(z_instruction, 16, 48); // base register 2648 } 2649 2650 ShouldNotReachHere(); 2651 return 0; 2652 } 2653 2654 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { 2655 const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */); 2656 // Armed page has poll_bit set. 2657 z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); 2658 z_brnaz(slow_path); 2659 } 2660 2661 // Don't rely on register locking, always use Z_R1 as scratch register instead. 2662 void MacroAssembler::bang_stack_with_offset(int offset) { 2663 // Stack grows down, caller passes positive offset. 2664 assert(offset > 0, "must bang with positive offset"); 2665 if (Displacement::is_validDisp(-offset)) { 2666 z_tmy(-offset, Z_SP, mask_stackbang); 2667 } else { 2668 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! 2669 z_tm(0, Z_R1, mask_stackbang); // Just banging. 2670 } 2671 } 2672 2673 void MacroAssembler::reserved_stack_check(Register return_pc) { 2674 // Test if reserved zone needs to be enabled. 2675 Label no_reserved_zone_enabling; 2676 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub."); 2677 BLOCK_COMMENT("reserved_stack_check {"); 2678 2679 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset())); 2680 z_brl(no_reserved_zone_enabling); 2681 2682 // Enable reserved zone again, throw stack overflow exception. 2683 save_return_pc(); 2684 push_frame_abi160(0); 2685 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread); 2686 pop_frame(); 2687 restore_return_pc(); 2688 2689 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry()); 2690 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc. 2691 z_br(Z_R1); 2692 2693 should_not_reach_here(); 2694 2695 bind(no_reserved_zone_enabling); 2696 BLOCK_COMMENT("} reserved_stack_check"); 2697 } 2698 2699 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 2700 void MacroAssembler::tlab_allocate(Register obj, 2701 Register var_size_in_bytes, 2702 int con_size_in_bytes, 2703 Register t1, 2704 Label& slow_case) { 2705 assert_different_registers(obj, var_size_in_bytes, t1); 2706 Register end = t1; 2707 Register thread = Z_thread; 2708 2709 z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); 2710 if (var_size_in_bytes == noreg) { 2711 z_lay(end, Address(obj, con_size_in_bytes)); 2712 } else { 2713 z_lay(end, Address(obj, var_size_in_bytes)); 2714 } 2715 z_cg(end, Address(thread, JavaThread::tlab_end_offset())); 2716 branch_optimized(bcondHigh, slow_case); 2717 2718 // Update the tlab top pointer. 2719 z_stg(end, Address(thread, JavaThread::tlab_top_offset())); 2720 2721 // Recover var_size_in_bytes if necessary. 2722 if (var_size_in_bytes == end) { 2723 z_sgr(var_size_in_bytes, obj); 2724 } 2725 } 2726 2727 // Emitter for interface method lookup. 2728 // input: recv_klass, intf_klass, itable_index 2729 // output: method_result 2730 // kills: itable_index, temp1_reg, Z_R0, Z_R1 2731 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. 2732 // If the register is still not needed then, remove it. 2733 void MacroAssembler::lookup_interface_method(Register recv_klass, 2734 Register intf_klass, 2735 RegisterOrConstant itable_index, 2736 Register method_result, 2737 Register temp1_reg, 2738 Label& no_such_interface, 2739 bool return_method) { 2740 2741 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. 2742 const Register itable_entry_addr = Z_R1_scratch; 2743 const Register itable_interface = Z_R0_scratch; 2744 2745 BLOCK_COMMENT("lookup_interface_method {"); 2746 2747 // Load start of itable entries into itable_entry_addr. 2748 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset())); 2749 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); 2750 2751 // Loop over all itable entries until desired interfaceOop(Rinterface) found. 2752 const int vtable_base_offset = in_bytes(Klass::vtable_start_offset()); 2753 2754 add2reg_with_index(itable_entry_addr, 2755 vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), 2756 recv_klass, vtable_len); 2757 2758 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; 2759 Label search; 2760 2761 bind(search); 2762 2763 // Handle IncompatibleClassChangeError. 2764 // If the entry is NULL then we've reached the end of the table 2765 // without finding the expected interface, so throw an exception. 2766 load_and_test_long(itable_interface, Address(itable_entry_addr)); 2767 z_bre(no_such_interface); 2768 2769 add2reg(itable_entry_addr, itable_offset_search_inc); 2770 z_cgr(itable_interface, intf_klass); 2771 z_brne(search); 2772 2773 // Entry found and itable_entry_addr points to it, get offset of vtable for interface. 2774 if (return_method) { 2775 const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - 2776 itableOffsetEntry::interface_offset_in_bytes()) - 2777 itable_offset_search_inc; 2778 2779 // Compute itableMethodEntry and get method and entry point 2780 // we use addressing with index and displacement, since the formula 2781 // for computing the entry's offset has a fixed and a dynamic part, 2782 // the latter depending on the matched interface entry and on the case, 2783 // that the itable index has been passed as a register, not a constant value. 2784 int method_offset = itableMethodEntry::method_offset_in_bytes(); 2785 // Fixed part (displacement), common operand. 2786 Register itable_offset = method_result; // Dynamic part (index register). 2787 2788 if (itable_index.is_register()) { 2789 // Compute the method's offset in that register, for the formula, see the 2790 // else-clause below. 2791 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize)); 2792 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); 2793 } else { 2794 // Displacement increases. 2795 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); 2796 2797 // Load index from itable. 2798 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); 2799 } 2800 2801 // Finally load the method's oop. 2802 z_lg(method_result, method_offset, itable_offset, recv_klass); 2803 } 2804 BLOCK_COMMENT("} lookup_interface_method"); 2805 } 2806 2807 // Lookup for virtual method invocation. 2808 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2809 RegisterOrConstant vtable_index, 2810 Register method_result) { 2811 assert_different_registers(recv_klass, vtable_index.register_or_noreg()); 2812 assert(vtableEntry::size() * wordSize == wordSize, 2813 "else adjust the scaling in the code below"); 2814 2815 BLOCK_COMMENT("lookup_virtual_method {"); 2816 2817 const int base = in_bytes(Klass::vtable_start_offset()); 2818 2819 if (vtable_index.is_constant()) { 2820 // Load with base + disp. 2821 Address vtable_entry_addr(recv_klass, 2822 vtable_index.as_constant() * wordSize + 2823 base + 2824 vtableEntry::method_offset_in_bytes()); 2825 2826 z_lg(method_result, vtable_entry_addr); 2827 } else { 2828 // Shift index properly and load with base + index + disp. 2829 Register vindex = vtable_index.as_register(); 2830 Address vtable_entry_addr(recv_klass, vindex, 2831 base + vtableEntry::method_offset_in_bytes()); 2832 2833 z_sllg(vindex, vindex, exact_log2(wordSize)); 2834 z_lg(method_result, vtable_entry_addr); 2835 } 2836 BLOCK_COMMENT("} lookup_virtual_method"); 2837 } 2838 2839 // Factor out code to call ic_miss_handler. 2840 // Generate code to call the inline cache miss handler. 2841 // 2842 // In most cases, this code will be generated out-of-line. 2843 // The method parameters are intended to provide some variability. 2844 // ICM - Label which has to be bound to the start of useful code (past any traps). 2845 // trapMarker - Marking byte for the generated illtrap instructions (if any). 2846 // Any value except 0x00 is supported. 2847 // = 0x00 - do not generate illtrap instructions. 2848 // use nops to fill unused space. 2849 // requiredSize - required size of the generated code. If the actually 2850 // generated code is smaller, use padding instructions to fill up. 2851 // = 0 - no size requirement, no padding. 2852 // scratch - scratch register to hold branch target address. 2853 // 2854 // The method returns the code offset of the bound label. 2855 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { 2856 intptr_t startOffset = offset(); 2857 2858 // Prevent entry at content_begin(). 2859 if (trapMarker != 0) { 2860 z_illtrap(trapMarker); 2861 } 2862 2863 // Load address of inline cache miss code into scratch register 2864 // and branch to cache miss handler. 2865 BLOCK_COMMENT("IC miss handler {"); 2866 BIND(ICM); 2867 unsigned int labelOffset = offset(); 2868 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); 2869 2870 load_const_optimized(scratch, icmiss); 2871 z_br(scratch); 2872 2873 // Fill unused space. 2874 if (requiredSize > 0) { 2875 while ((offset() - startOffset) < requiredSize) { 2876 if (trapMarker == 0) { 2877 z_nop(); 2878 } else { 2879 z_illtrap(trapMarker); 2880 } 2881 } 2882 } 2883 BLOCK_COMMENT("} IC miss handler"); 2884 return labelOffset; 2885 } 2886 2887 void MacroAssembler::nmethod_UEP(Label& ic_miss) { 2888 Register ic_reg = Z_inline_cache; 2889 int klass_offset = oopDesc::klass_offset_in_bytes(); 2890 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { 2891 if (VM_Version::has_CompareBranch()) { 2892 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); 2893 } else { 2894 z_ltgr(Z_ARG1, Z_ARG1); 2895 z_bre(ic_miss); 2896 } 2897 } 2898 // Compare cached class against klass from receiver. 2899 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); 2900 z_brne(ic_miss); 2901 } 2902 2903 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2904 Register super_klass, 2905 Register temp1_reg, 2906 Label* L_success, 2907 Label* L_failure, 2908 Label* L_slow_path, 2909 RegisterOrConstant super_check_offset) { 2910 2911 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2912 const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2913 2914 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 2915 bool need_slow_path = (must_load_sco || 2916 super_check_offset.constant_or_zero() == sc_offset); 2917 2918 // Input registers must not overlap. 2919 assert_different_registers(sub_klass, super_klass, temp1_reg); 2920 if (super_check_offset.is_register()) { 2921 assert_different_registers(sub_klass, super_klass, 2922 super_check_offset.as_register()); 2923 } else if (must_load_sco) { 2924 assert(temp1_reg != noreg, "supply either a temp or a register offset"); 2925 } 2926 2927 const Register Rsuper_check_offset = temp1_reg; 2928 2929 NearLabel L_fallthrough; 2930 int label_nulls = 0; 2931 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 2932 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 2933 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 2934 assert(label_nulls <= 1 || 2935 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 2936 "at most one NULL in the batch, usually"); 2937 2938 BLOCK_COMMENT("check_klass_subtype_fast_path {"); 2939 // If the pointers are equal, we are done (e.g., String[] elements). 2940 // This self-check enables sharing of secondary supertype arrays among 2941 // non-primary types such as array-of-interface. Otherwise, each such 2942 // type would need its own customized SSA. 2943 // We move this check to the front of the fast path because many 2944 // type checks are in fact trivially successful in this manner, 2945 // so we get a nicely predicted branch right at the start of the check. 2946 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); 2947 2948 // Check the supertype display, which is uint. 2949 if (must_load_sco) { 2950 z_llgf(Rsuper_check_offset, sco_offset, super_klass); 2951 super_check_offset = RegisterOrConstant(Rsuper_check_offset); 2952 } 2953 Address super_check_addr(sub_klass, super_check_offset, 0); 2954 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype 2955 2956 // This check has worked decisively for primary supers. 2957 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2958 // (Secondary supers are interfaces and very deeply nested subtypes.) 2959 // This works in the same check above because of a tricky aliasing 2960 // between the super_cache and the primary super display elements. 2961 // (The 'super_check_addr' can address either, as the case requires.) 2962 // Note that the cache is updated below if it does not help us find 2963 // what we need immediately. 2964 // So if it was a primary super, we can just fail immediately. 2965 // Otherwise, it's the slow path for us (no success at this point). 2966 2967 // Hacked jmp, which may only be used just before L_fallthrough. 2968 #define final_jmp(label) \ 2969 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2970 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ 2971 2972 if (super_check_offset.is_register()) { 2973 branch_optimized(Assembler::bcondEqual, *L_success); 2974 z_cfi(super_check_offset.as_register(), sc_offset); 2975 if (L_failure == &L_fallthrough) { 2976 branch_optimized(Assembler::bcondEqual, *L_slow_path); 2977 } else { 2978 branch_optimized(Assembler::bcondNotEqual, *L_failure); 2979 final_jmp(*L_slow_path); 2980 } 2981 } else if (super_check_offset.as_constant() == sc_offset) { 2982 // Need a slow path; fast failure is impossible. 2983 if (L_slow_path == &L_fallthrough) { 2984 branch_optimized(Assembler::bcondEqual, *L_success); 2985 } else { 2986 branch_optimized(Assembler::bcondNotEqual, *L_slow_path); 2987 final_jmp(*L_success); 2988 } 2989 } else { 2990 // No slow path; it's a fast decision. 2991 if (L_failure == &L_fallthrough) { 2992 branch_optimized(Assembler::bcondEqual, *L_success); 2993 } else { 2994 branch_optimized(Assembler::bcondNotEqual, *L_failure); 2995 final_jmp(*L_success); 2996 } 2997 } 2998 2999 bind(L_fallthrough); 3000 #undef local_brc 3001 #undef final_jmp 3002 BLOCK_COMMENT("} check_klass_subtype_fast_path"); 3003 // fallthru (to slow path) 3004 } 3005 3006 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, 3007 Register Rsuperklass, 3008 Register Rarray_ptr, // tmp 3009 Register Rlength, // tmp 3010 Label* L_success, 3011 Label* L_failure) { 3012 // Input registers must not overlap. 3013 // Also check for R1 which is explicitly used here. 3014 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); 3015 NearLabel L_fallthrough; 3016 int label_nulls = 0; 3017 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 3018 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 3019 assert(label_nulls <= 1, "at most one NULL in the batch"); 3020 3021 const int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3022 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3023 3024 const int length_offset = Array<Klass*>::length_offset_in_bytes(); 3025 const int base_offset = Array<Klass*>::base_offset_in_bytes(); 3026 3027 // Hacked jmp, which may only be used just before L_fallthrough. 3028 #define final_jmp(label) \ 3029 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3030 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ 3031 3032 NearLabel loop_iterate, loop_count, match; 3033 3034 BLOCK_COMMENT("check_klass_subtype_slow_path {"); 3035 z_lg(Rarray_ptr, ss_offset, Rsubklass); 3036 3037 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); 3038 branch_optimized(Assembler::bcondZero, *L_failure); 3039 3040 // Oops in table are NO MORE compressed. 3041 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. 3042 z_bre(match); // Shortcut for array length = 1. 3043 3044 // No match yet, so we must walk the array's elements. 3045 z_lngfr(Rlength, Rlength); 3046 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array 3047 z_llill(Z_R1, BytesPerWord); // Set increment/end index. 3048 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord 3049 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord 3050 z_bru(loop_count); 3051 3052 BIND(loop_iterate); 3053 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. 3054 z_bre(match); 3055 BIND(loop_count); 3056 z_brxlg(Rlength, Z_R1, loop_iterate); 3057 3058 // Rsuperklass not found among secondary super classes -> failure. 3059 branch_optimized(Assembler::bcondAlways, *L_failure); 3060 3061 // Got a hit. Return success (zero result). Set cache. 3062 // Cache load doesn't happen here. For speed it is directly emitted by the compiler. 3063 3064 BIND(match); 3065 3066 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. 3067 3068 final_jmp(*L_success); 3069 3070 // Exit to the surrounding code. 3071 BIND(L_fallthrough); 3072 #undef local_brc 3073 #undef final_jmp 3074 BLOCK_COMMENT("} check_klass_subtype_slow_path"); 3075 } 3076 3077 // Emitter for combining fast and slow path. 3078 void MacroAssembler::check_klass_subtype(Register sub_klass, 3079 Register super_klass, 3080 Register temp1_reg, 3081 Register temp2_reg, 3082 Label& L_success) { 3083 NearLabel failure; 3084 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); 3085 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, 3086 &L_success, &failure, NULL); 3087 check_klass_subtype_slow_path(sub_klass, super_klass, 3088 temp1_reg, temp2_reg, &L_success, NULL); 3089 BIND(failure); 3090 BLOCK_COMMENT("} check_klass_subtype"); 3091 } 3092 3093 void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { 3094 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); 3095 3096 Label L_fallthrough; 3097 if (L_fast_path == NULL) { 3098 L_fast_path = &L_fallthrough; 3099 } else if (L_slow_path == NULL) { 3100 L_slow_path = &L_fallthrough; 3101 } 3102 3103 // Fast path check: class is fully initialized 3104 z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); 3105 z_bre(*L_fast_path); 3106 3107 // Fast path check: current thread is initializer thread 3108 z_cg(thread, Address(klass, InstanceKlass::init_thread_offset())); 3109 if (L_slow_path == &L_fallthrough) { 3110 z_bre(*L_fast_path); 3111 } else if (L_fast_path == &L_fallthrough) { 3112 z_brne(*L_slow_path); 3113 } else { 3114 Unimplemented(); 3115 } 3116 3117 bind(L_fallthrough); 3118 } 3119 3120 // Increment a counter at counter_address when the eq condition code is 3121 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. 3122 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { 3123 Label l; 3124 z_brne(l); 3125 load_const(tmp1_reg, counter_address); 3126 add2mem_32(Address(tmp1_reg), 1, tmp2_reg); 3127 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. 3128 bind(l); 3129 } 3130 3131 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { 3132 Register displacedHeader = temp1; 3133 Register currentHeader = temp1; 3134 Register temp = temp2; 3135 NearLabel done, object_has_monitor; 3136 3137 BLOCK_COMMENT("compiler_fast_lock_object {"); 3138 3139 // Load markWord from oop into mark. 3140 z_lg(displacedHeader, 0, oop); 3141 3142 if (DiagnoseSyncOnValueBasedClasses != 0) { 3143 load_klass(Z_R1_scratch, oop); 3144 z_l(Z_R1_scratch, Address(Z_R1_scratch, Klass::access_flags_offset())); 3145 assert((JVM_ACC_IS_VALUE_BASED_CLASS & 0xFFFF) == 0, "or change following instruction"); 3146 z_nilh(Z_R1_scratch, JVM_ACC_IS_VALUE_BASED_CLASS >> 16); 3147 z_brne(done); 3148 } 3149 3150 // Handle existing monitor. 3151 // The object has an existing monitor iff (mark & monitor_value) != 0. 3152 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3153 z_lr(temp, displacedHeader); 3154 z_nill(temp, markWord::monitor_value); 3155 z_brne(object_has_monitor); 3156 3157 // Set mark to markWord | markWord::unlocked_value. 3158 z_oill(displacedHeader, markWord::unlocked_value); 3159 3160 // Load Compare Value application register. 3161 3162 // Initialize the box (must happen before we update the object mark). 3163 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); 3164 3165 // Memory Fence (in cmpxchgd) 3166 // Compare object markWord with mark and if equal exchange scratch1 with object markWord. 3167 3168 // If the compare-and-swap succeeded, then we found an unlocked object and we 3169 // have now locked it. 3170 z_csg(displacedHeader, box, 0, oop); 3171 assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture. 3172 z_bre(done); 3173 3174 // We did not see an unlocked object so try the fast recursive case. 3175 3176 z_sgr(currentHeader, Z_SP); 3177 load_const_optimized(temp, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); 3178 3179 z_ngr(currentHeader, temp); 3180 // z_brne(done); 3181 // z_release(); 3182 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); 3183 3184 z_bru(done); 3185 3186 Register zero = temp; 3187 Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. 3188 bind(object_has_monitor); 3189 // The object's monitor m is unlocked iff m->owner == NULL, 3190 // otherwise m->owner may contain a thread or a stack address. 3191 // 3192 // Try to CAS m->owner from NULL to current thread. 3193 z_lghi(zero, 0); 3194 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. 3195 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); 3196 // Store a non-null value into the box. 3197 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); 3198 #ifdef ASSERT 3199 z_brne(done); 3200 // We've acquired the monitor, check some invariants. 3201 // Invariant 1: _recursions should be 0. 3202 asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged, 3203 "monitor->_recursions should be 0", -1); 3204 z_ltgr(zero, zero); // Set CR=EQ. 3205 #endif 3206 bind(done); 3207 3208 BLOCK_COMMENT("} compiler_fast_lock_object"); 3209 // If locking was successful, CR should indicate 'EQ'. 3210 // The compiler or the native wrapper generates a branch to the runtime call 3211 // _complete_monitor_locking_Java. 3212 } 3213 3214 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { 3215 Register displacedHeader = temp1; 3216 Register currentHeader = temp2; 3217 Register temp = temp1; 3218 Register monitor = temp2; 3219 3220 Label done, object_has_monitor; 3221 3222 BLOCK_COMMENT("compiler_fast_unlock_object {"); 3223 3224 // Find the lock address and load the displaced header from the stack. 3225 // if the displaced header is zero, we have a recursive unlock. 3226 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); 3227 z_bre(done); 3228 3229 // Handle existing monitor. 3230 // The object has an existing monitor iff (mark & monitor_value) != 0. 3231 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); 3232 guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); 3233 z_nill(currentHeader, markWord::monitor_value); 3234 z_brne(object_has_monitor); 3235 3236 // Check if it is still a light weight lock, this is true if we see 3237 // the stack address of the basicLock in the markWord of the object 3238 // copy box to currentHeader such that csg does not kill it. 3239 z_lgr(currentHeader, box); 3240 z_csg(currentHeader, displacedHeader, 0, oop); 3241 z_bru(done); // Csg sets CR as desired. 3242 3243 // Handle existing monitor. 3244 bind(object_has_monitor); 3245 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set. 3246 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); 3247 z_brne(done); 3248 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3249 z_brne(done); 3250 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); 3251 z_brne(done); 3252 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); 3253 z_brne(done); 3254 z_release(); 3255 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); 3256 3257 bind(done); 3258 3259 BLOCK_COMMENT("} compiler_fast_unlock_object"); 3260 // flag == EQ indicates success 3261 // flag == NE indicates failure 3262 } 3263 3264 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 3265 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3266 bs->resolve_jobject(this, value, tmp1, tmp2); 3267 } 3268 3269 // Last_Java_sp must comply to the rules in frame_s390.hpp. 3270 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { 3271 BLOCK_COMMENT("set_last_Java_frame {"); 3272 3273 // Always set last_Java_pc and flags first because once last_Java_sp 3274 // is visible has_last_Java_frame is true and users will look at the 3275 // rest of the fields. (Note: flags should always be zero before we 3276 // get here so doesn't need to be set.) 3277 3278 // Verify that last_Java_pc was zeroed on return to Java. 3279 if (allow_relocation) { 3280 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), 3281 Z_thread, 3282 "last_Java_pc not zeroed before leaving Java", 3283 0x200); 3284 } else { 3285 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), 3286 Z_thread, 3287 "last_Java_pc not zeroed before leaving Java", 3288 0x200); 3289 } 3290 3291 // When returning from calling out from Java mode the frame anchor's 3292 // last_Java_pc will always be set to NULL. It is set here so that 3293 // if we are doing a call to native (not VM) that we capture the 3294 // known pc and don't have to rely on the native call having a 3295 // standard frame linkage where we can find the pc. 3296 if (last_Java_pc!=noreg) { 3297 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); 3298 } 3299 3300 // This membar release is not required on z/Architecture, since the sequence of stores 3301 // in maintained. Nevertheless, we leave it in to document the required ordering. 3302 // The implementation of z_release() should be empty. 3303 // z_release(); 3304 3305 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); 3306 BLOCK_COMMENT("} set_last_Java_frame"); 3307 } 3308 3309 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { 3310 BLOCK_COMMENT("reset_last_Java_frame {"); 3311 3312 if (allow_relocation) { 3313 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 3314 Z_thread, 3315 "SP was not set, still zero", 3316 0x202); 3317 } else { 3318 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), 3319 Z_thread, 3320 "SP was not set, still zero", 3321 0x202); 3322 } 3323 3324 // _last_Java_sp = 0 3325 // Clearing storage must be atomic here, so don't use clear_mem()! 3326 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); 3327 3328 // _last_Java_pc = 0 3329 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); 3330 3331 BLOCK_COMMENT("} reset_last_Java_frame"); 3332 return; 3333 } 3334 3335 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { 3336 assert_different_registers(sp, tmp1); 3337 3338 // We cannot trust that code generated by the C++ compiler saves R14 3339 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at 3340 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). 3341 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save 3342 // it into the frame anchor. 3343 get_PC(tmp1); 3344 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); 3345 } 3346 3347 void MacroAssembler::set_thread_state(JavaThreadState new_state) { 3348 z_release(); 3349 3350 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); 3351 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); 3352 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); 3353 } 3354 3355 void MacroAssembler::get_vm_result(Register oop_result) { 3356 verify_thread(); 3357 3358 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3359 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); 3360 3361 verify_oop(oop_result, FILE_AND_LINE); 3362 } 3363 3364 void MacroAssembler::get_vm_result_2(Register result) { 3365 verify_thread(); 3366 3367 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); 3368 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); 3369 } 3370 3371 // We require that C code which does not return a value in vm_result will 3372 // leave it undisturbed. 3373 void MacroAssembler::set_vm_result(Register oop_result) { 3374 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); 3375 } 3376 3377 // Explicit null checks (used for method handle code). 3378 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { 3379 if (!ImplicitNullChecks) { 3380 NearLabel ok; 3381 3382 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); 3383 3384 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). 3385 address exception_entry = Interpreter::throw_NullPointerException_entry(); 3386 load_absolute_address(reg, exception_entry); 3387 z_br(reg); 3388 3389 bind(ok); 3390 } else { 3391 if (needs_explicit_null_check((intptr_t)offset)) { 3392 // Provoke OS NULL exception if reg = NULL by 3393 // accessing M[reg] w/o changing any registers. 3394 z_lg(tmp, 0, reg); 3395 } 3396 // else 3397 // Nothing to do, (later) access of M[reg + offset] 3398 // will provoke OS NULL exception if reg = NULL. 3399 } 3400 } 3401 3402 //------------------------------------- 3403 // Compressed Klass Pointers 3404 //------------------------------------- 3405 3406 // Klass oop manipulations if compressed. 3407 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3408 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. 3409 address base = CompressedKlassPointers::base(); 3410 int shift = CompressedKlassPointers::shift(); 3411 bool need_zero_extend = base != 0; 3412 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3413 3414 BLOCK_COMMENT("cKlass encoder {"); 3415 3416 #ifdef ASSERT 3417 Label ok; 3418 z_tmll(current, KlassAlignmentInBytes-1); // Check alignment. 3419 z_brc(Assembler::bcondAllZero, ok); 3420 // The plain disassembler does not recognize illtrap. It instead displays 3421 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3422 // the proper beginning of the next instruction. 3423 z_illtrap(0xee); 3424 z_illtrap(0xee); 3425 bind(ok); 3426 #endif 3427 3428 // Scale down the incoming klass pointer first. 3429 // We then can be sure we calculate an offset that fits into 32 bit. 3430 // More generally speaking: all subsequent calculations are purely 32-bit. 3431 if (shift != 0) { 3432 assert (LogKlassAlignmentInBytes == shift, "decode alg wrong"); 3433 z_srlg(dst, current, shift); 3434 current = dst; 3435 } 3436 3437 if (base != NULL) { 3438 // Use scaled-down base address parts to match scaled-down klass pointer. 3439 unsigned int base_h = ((unsigned long)base)>>(32+shift); 3440 unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift); 3441 3442 // General considerations: 3443 // - when calculating (current_h - base_h), all digits must cancel (become 0). 3444 // Otherwise, we would end up with a compressed klass pointer which doesn't 3445 // fit into 32-bit. 3446 // - Only bit#33 of the difference could potentially be non-zero. For that 3447 // to happen, (current_l < base_l) must hold. In this case, the subtraction 3448 // will create a borrow out of bit#32, nicely killing bit#33. 3449 // - With the above, we only need to consider current_l and base_l to 3450 // calculate the result. 3451 // - Both values are treated as unsigned. The unsigned subtraction is 3452 // replaced by adding (unsigned) the 2's complement of the subtrahend. 3453 3454 if (base_l == 0) { 3455 // - By theory, the calculation to be performed here (current_h - base_h) MUST 3456 // cancel all high-word bits. Otherwise, we would end up with an offset 3457 // (i.e. compressed klass pointer) that does not fit into 32 bit. 3458 // - current_l remains unchanged. 3459 // - Therefore, we can replace all calculation with just a 3460 // zero-extending load 32 to 64 bit. 3461 // - Even that can be replaced with a conditional load if dst != current. 3462 // (this is a local view. The shift step may have requested zero-extension). 3463 } else { 3464 if ((base_h == 0) && is_uimm(base_l, 31)) { 3465 // If we happen to find that (base_h == 0), and that base_l is within the range 3466 // which can be represented by a signed int, then we can use 64bit signed add with 3467 // (-base_l) as 32bit signed immediate operand. The add will take care of the 3468 // upper 32 bits of the result, saving us the need of an extra zero extension. 3469 // For base_l to be in the required range, it must not have the most significant 3470 // bit (aka sign bit) set. 3471 lgr_if_needed(dst, current); // no zero/sign extension in this case! 3472 z_agfi(dst, -(int)base_l); // base_l must be passed as signed. 3473 need_zero_extend = false; 3474 current = dst; 3475 } else { 3476 // To begin with, we may need to copy and/or zero-extend the register operand. 3477 // We have to calculate (current_l - base_l). Because there is no unsigend 3478 // subtract instruction with immediate operand, we add the 2's complement of base_l. 3479 if (need_zero_extend) { 3480 z_llgfr(dst, current); 3481 need_zero_extend = false; 3482 } else { 3483 llgfr_if_needed(dst, current); 3484 } 3485 current = dst; 3486 z_alfi(dst, -base_l); 3487 } 3488 } 3489 } 3490 3491 if (need_zero_extend) { 3492 // We must zero-extend the calculated result. It may have some leftover bits in 3493 // the hi-word because we only did optimized calculations. 3494 z_llgfr(dst, current); 3495 } else { 3496 llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost. 3497 } 3498 3499 BLOCK_COMMENT("} cKlass encoder"); 3500 } 3501 3502 // This function calculates the size of the code generated by 3503 // decode_klass_not_null(register dst, Register src) 3504 // when (Universe::heap() != NULL). Hence, if the instructions 3505 // it generates change, then this method needs to be updated. 3506 int MacroAssembler::instr_size_for_decode_klass_not_null() { 3507 address base = CompressedKlassPointers::base(); 3508 int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */ 3509 int addbase_size = 0; 3510 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3511 3512 if (base != NULL) { 3513 unsigned int base_h = ((unsigned long)base)>>32; 3514 unsigned int base_l = (unsigned int)((unsigned long)base); 3515 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3516 addbase_size += 6; /* aih */ 3517 } else if ((base_h == 0) && (base_l != 0)) { 3518 addbase_size += 6; /* algfi */ 3519 } else { 3520 addbase_size += load_const_size(); 3521 addbase_size += 4; /* algr */ 3522 } 3523 } 3524 #ifdef ASSERT 3525 addbase_size += 10; 3526 addbase_size += 2; // Extra sigill. 3527 #endif 3528 return addbase_size + shift_size; 3529 } 3530 3531 // !!! If the instructions that get generated here change 3532 // then function instr_size_for_decode_klass_not_null() 3533 // needs to get updated. 3534 // This variant of decode_klass_not_null() must generate predictable code! 3535 // The code must only depend on globally known parameters. 3536 void MacroAssembler::decode_klass_not_null(Register dst) { 3537 address base = CompressedKlassPointers::base(); 3538 int shift = CompressedKlassPointers::shift(); 3539 int beg_off = offset(); 3540 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3541 3542 BLOCK_COMMENT("cKlass decoder (const size) {"); 3543 3544 if (shift != 0) { // Shift required? 3545 z_sllg(dst, dst, shift); 3546 } 3547 if (base != NULL) { 3548 unsigned int base_h = ((unsigned long)base)>>32; 3549 unsigned int base_l = (unsigned int)((unsigned long)base); 3550 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3551 z_aih(dst, base_h); // Base has no set bits in lower half. 3552 } else if ((base_h == 0) && (base_l != 0)) { 3553 z_algfi(dst, base_l); // Base has no set bits in upper half. 3554 } else { 3555 load_const(Z_R0, base); // Base has set bits everywhere. 3556 z_algr(dst, Z_R0); 3557 } 3558 } 3559 3560 #ifdef ASSERT 3561 Label ok; 3562 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3563 z_brc(Assembler::bcondAllZero, ok); 3564 // The plain disassembler does not recognize illtrap. It instead displays 3565 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3566 // the proper beginning of the next instruction. 3567 z_illtrap(0xd1); 3568 z_illtrap(0xd1); 3569 bind(ok); 3570 #endif 3571 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); 3572 3573 BLOCK_COMMENT("} cKlass decoder (const size)"); 3574 } 3575 3576 // This variant of decode_klass_not_null() is for cases where 3577 // 1) the size of the generated instructions may vary 3578 // 2) the result is (potentially) stored in a register different from the source. 3579 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3580 address base = CompressedKlassPointers::base(); 3581 int shift = CompressedKlassPointers::shift(); 3582 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3583 3584 BLOCK_COMMENT("cKlass decoder {"); 3585 3586 if (src == noreg) src = dst; 3587 3588 if (shift != 0) { // Shift or at least move required? 3589 z_sllg(dst, src, shift); 3590 } else { 3591 lgr_if_needed(dst, src); 3592 } 3593 3594 if (base != NULL) { 3595 unsigned int base_h = ((unsigned long)base)>>32; 3596 unsigned int base_l = (unsigned int)((unsigned long)base); 3597 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3598 z_aih(dst, base_h); // Base has not set bits in lower half. 3599 } else if ((base_h == 0) && (base_l != 0)) { 3600 z_algfi(dst, base_l); // Base has no set bits in upper half. 3601 } else { 3602 load_const_optimized(Z_R0, base); // Base has set bits everywhere. 3603 z_algr(dst, Z_R0); 3604 } 3605 } 3606 3607 #ifdef ASSERT 3608 Label ok; 3609 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. 3610 z_brc(Assembler::bcondAllZero, ok); 3611 // The plain disassembler does not recognize illtrap. It instead displays 3612 // a 32-bit value. Issuing two illtraps assures the disassembler finds 3613 // the proper beginning of the next instruction. 3614 z_illtrap(0xd2); 3615 z_illtrap(0xd2); 3616 bind(ok); 3617 #endif 3618 BLOCK_COMMENT("} cKlass decoder"); 3619 } 3620 3621 void MacroAssembler::load_klass(Register klass, Address mem) { 3622 if (UseCompressedClassPointers) { 3623 z_llgf(klass, mem); 3624 // Attention: no null check here! 3625 decode_klass_not_null(klass); 3626 } else { 3627 z_lg(klass, mem); 3628 } 3629 } 3630 3631 void MacroAssembler::load_klass(Register klass, Register src_oop) { 3632 if (UseCompressedClassPointers) { 3633 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3634 // Attention: no null check here! 3635 decode_klass_not_null(klass); 3636 } else { 3637 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); 3638 } 3639 } 3640 3641 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { 3642 if (UseCompressedClassPointers) { 3643 assert_different_registers(dst_oop, klass, Z_R0); 3644 if (ck == noreg) ck = klass; 3645 encode_klass_not_null(ck, klass); 3646 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3647 } else { 3648 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 3649 } 3650 } 3651 3652 void MacroAssembler::store_klass_gap(Register s, Register d) { 3653 if (UseCompressedClassPointers) { 3654 assert(s != d, "not enough registers"); 3655 // Support s = noreg. 3656 if (s != noreg) { 3657 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); 3658 } else { 3659 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0); 3660 } 3661 } 3662 } 3663 3664 // Compare klass ptr in memory against klass ptr in register. 3665 // 3666 // Rop1 - klass in register, always uncompressed. 3667 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. 3668 // Rbase - Base address of cKlass in memory. 3669 // maybeNULL - True if Rop1 possibly is a NULL. 3670 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) { 3671 3672 BLOCK_COMMENT("compare klass ptr {"); 3673 3674 if (UseCompressedClassPointers) { 3675 const int shift = CompressedKlassPointers::shift(); 3676 address base = CompressedKlassPointers::base(); 3677 3678 assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift"); 3679 assert_different_registers(Rop1, Z_R0); 3680 assert_different_registers(Rop1, Rbase, Z_R1); 3681 3682 // First encode register oop and then compare with cOop in memory. 3683 // This sequence saves an unnecessary cOop load and decode. 3684 if (base == NULL) { 3685 if (shift == 0) { 3686 z_cl(Rop1, disp, Rbase); // Unscaled 3687 } else { 3688 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3689 z_cl(Z_R0, disp, Rbase); 3690 } 3691 } else { // HeapBased 3692 #ifdef ASSERT 3693 bool used_R0 = true; 3694 bool used_R1 = true; 3695 #endif 3696 Register current = Rop1; 3697 Label done; 3698 3699 if (maybeNULL) { // NULL ptr must be preserved! 3700 z_ltgr(Z_R0, current); 3701 z_bre(done); 3702 current = Z_R0; 3703 } 3704 3705 unsigned int base_h = ((unsigned long)base)>>32; 3706 unsigned int base_l = (unsigned int)((unsigned long)base); 3707 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { 3708 lgr_if_needed(Z_R0, current); 3709 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. 3710 } else if ((base_h == 0) && (base_l != 0)) { 3711 lgr_if_needed(Z_R0, current); 3712 z_agfi(Z_R0, -(int)base_l); 3713 } else { 3714 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3715 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. 3716 } 3717 3718 if (shift != 0) { 3719 z_srlg(Z_R0, Z_R0, shift); 3720 } 3721 bind(done); 3722 z_cl(Z_R0, disp, Rbase); 3723 #ifdef ASSERT 3724 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3725 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3726 #endif 3727 } 3728 } else { 3729 z_clg(Rop1, disp, Z_R0, Rbase); 3730 } 3731 BLOCK_COMMENT("} compare klass ptr"); 3732 } 3733 3734 //--------------------------- 3735 // Compressed oops 3736 //--------------------------- 3737 3738 void MacroAssembler::encode_heap_oop(Register oop) { 3739 oop_encoder(oop, oop, true /*maybe null*/); 3740 } 3741 3742 void MacroAssembler::encode_heap_oop_not_null(Register oop) { 3743 oop_encoder(oop, oop, false /*not null*/); 3744 } 3745 3746 // Called with something derived from the oop base. e.g. oop_base>>3. 3747 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { 3748 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; 3749 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; 3750 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; 3751 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; 3752 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) 3753 + (oop_base_lh == 0 ? 0:1) 3754 + (oop_base_hl == 0 ? 0:1) 3755 + (oop_base_hh == 0 ? 0:1); 3756 3757 assert(oop_base != 0, "This is for HeapBased cOops only"); 3758 3759 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. 3760 uint64_t pow2_offset = 0x10000 - oop_base_ll; 3761 if (pow2_offset < 0x8000) { // This might not be necessary. 3762 uint64_t oop_base2 = oop_base + pow2_offset; 3763 3764 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; 3765 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; 3766 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; 3767 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; 3768 n_notzero_parts = (oop_base_ll == 0 ? 0:1) + 3769 (oop_base_lh == 0 ? 0:1) + 3770 (oop_base_hl == 0 ? 0:1) + 3771 (oop_base_hh == 0 ? 0:1); 3772 if (n_notzero_parts == 1) { 3773 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); 3774 return -pow2_offset; 3775 } 3776 } 3777 } 3778 return 0; 3779 } 3780 3781 // If base address is offset from a straight power of two by just a few pages, 3782 // return this offset to the caller for a possible later composite add. 3783 // TODO/FIX: will only work correctly for 4k pages. 3784 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { 3785 int pow2_offset = get_oop_base_pow2_offset(oop_base); 3786 3787 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. 3788 3789 return pow2_offset; 3790 } 3791 3792 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { 3793 int offset = get_oop_base(Rbase, oop_base); 3794 z_lcgr(Rbase, Rbase); 3795 return -offset; 3796 } 3797 3798 // Compare compressed oop in memory against oop in register. 3799 // Rop1 - Oop in register. 3800 // disp - Offset of cOop in memory. 3801 // Rbase - Base address of cOop in memory. 3802 // maybeNULL - True if Rop1 possibly is a NULL. 3803 // maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction. 3804 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) { 3805 Register Rbase = mem.baseOrR0(); 3806 Register Rindex = mem.indexOrR0(); 3807 int64_t disp = mem.disp(); 3808 3809 const int shift = CompressedOops::shift(); 3810 address base = CompressedOops::base(); 3811 3812 assert(UseCompressedOops, "must be on to call this method"); 3813 assert(Universe::heap() != NULL, "java heap must be initialized to call this method"); 3814 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3815 assert_different_registers(Rop1, Z_R0); 3816 assert_different_registers(Rop1, Rbase, Z_R1); 3817 assert_different_registers(Rop1, Rindex, Z_R1); 3818 3819 BLOCK_COMMENT("compare heap oop {"); 3820 3821 // First encode register oop and then compare with cOop in memory. 3822 // This sequence saves an unnecessary cOop load and decode. 3823 if (base == NULL) { 3824 if (shift == 0) { 3825 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled 3826 } else { 3827 z_srlg(Z_R0, Rop1, shift); // ZeroBased 3828 z_cl(Z_R0, disp, Rindex, Rbase); 3829 } 3830 } else { // HeapBased 3831 #ifdef ASSERT 3832 bool used_R0 = true; 3833 bool used_R1 = true; 3834 #endif 3835 Label done; 3836 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); 3837 3838 if (maybeNULL) { // NULL ptr must be preserved! 3839 z_ltgr(Z_R0, Rop1); 3840 z_bre(done); 3841 } 3842 3843 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); 3844 z_srlg(Z_R0, Z_R0, shift); 3845 3846 bind(done); 3847 z_cl(Z_R0, disp, Rindex, Rbase); 3848 #ifdef ASSERT 3849 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); 3850 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); 3851 #endif 3852 } 3853 BLOCK_COMMENT("} compare heap oop"); 3854 } 3855 3856 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 3857 const Address& addr, Register val, 3858 Register tmp1, Register tmp2, Register tmp3) { 3859 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3860 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator"); 3861 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3862 decorators = AccessInternal::decorator_fixup(decorators); 3863 bool as_raw = (decorators & AS_RAW) != 0; 3864 if (as_raw) { 3865 bs->BarrierSetAssembler::store_at(this, decorators, type, 3866 addr, val, 3867 tmp1, tmp2, tmp3); 3868 } else { 3869 bs->store_at(this, decorators, type, 3870 addr, val, 3871 tmp1, tmp2, tmp3); 3872 } 3873 } 3874 3875 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 3876 const Address& addr, Register dst, 3877 Register tmp1, Register tmp2, Label *is_null) { 3878 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL | 3879 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator"); 3880 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3881 decorators = AccessInternal::decorator_fixup(decorators); 3882 bool as_raw = (decorators & AS_RAW) != 0; 3883 if (as_raw) { 3884 bs->BarrierSetAssembler::load_at(this, decorators, type, 3885 addr, dst, 3886 tmp1, tmp2, is_null); 3887 } else { 3888 bs->load_at(this, decorators, type, 3889 addr, dst, 3890 tmp1, tmp2, is_null); 3891 } 3892 } 3893 3894 void MacroAssembler::load_heap_oop(Register dest, const Address &a, 3895 Register tmp1, Register tmp2, 3896 DecoratorSet decorators, Label *is_null) { 3897 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null); 3898 } 3899 3900 void MacroAssembler::store_heap_oop(Register Roop, const Address &a, 3901 Register tmp1, Register tmp2, Register tmp3, 3902 DecoratorSet decorators) { 3903 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3); 3904 } 3905 3906 //------------------------------------------------- 3907 // Encode compressed oop. Generally usable encoder. 3908 //------------------------------------------------- 3909 // Rsrc - contains regular oop on entry. It remains unchanged. 3910 // Rdst - contains compressed oop on exit. 3911 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. 3912 // 3913 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. 3914 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. 3915 // 3916 // only32bitValid is set, if later code only uses the lower 32 bits. In this 3917 // case we must not fix the upper 32 bits. 3918 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL, 3919 Register Rbase, int pow2_offset, bool only32bitValid) { 3920 3921 const address oop_base = CompressedOops::base(); 3922 const int oop_shift = CompressedOops::shift(); 3923 const bool disjoint = CompressedOops::base_disjoint(); 3924 3925 assert(UseCompressedOops, "must be on to call this method"); 3926 assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder"); 3927 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); 3928 3929 if (disjoint || (oop_base == NULL)) { 3930 BLOCK_COMMENT("cOop encoder zeroBase {"); 3931 if (oop_shift == 0) { 3932 if (oop_base != NULL && !only32bitValid) { 3933 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. 3934 } else { 3935 lgr_if_needed(Rdst, Rsrc); 3936 } 3937 } else { 3938 z_srlg(Rdst, Rsrc, oop_shift); 3939 if (oop_base != NULL && !only32bitValid) { 3940 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 3941 } 3942 } 3943 BLOCK_COMMENT("} cOop encoder zeroBase"); 3944 return; 3945 } 3946 3947 bool used_R0 = false; 3948 bool used_R1 = false; 3949 3950 BLOCK_COMMENT("cOop encoder general {"); 3951 assert_different_registers(Rdst, Z_R1); 3952 assert_different_registers(Rsrc, Rbase); 3953 if (maybeNULL) { 3954 Label done; 3955 // We reorder shifting and subtracting, so that we can compare 3956 // and shift in parallel: 3957 // 3958 // cycle 0: potential LoadN, base = <const> 3959 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) 3960 // cycle 2: if (cr) br, dst = dst + base + offset 3961 3962 // Get oop_base components. 3963 if (pow2_offset == -1) { 3964 if (Rdst == Rbase) { 3965 if (Rdst == Z_R1 || Rsrc == Z_R1) { 3966 Rbase = Z_R0; 3967 used_R0 = true; 3968 } else { 3969 Rdst = Z_R1; 3970 used_R1 = true; 3971 } 3972 } 3973 if (Rbase == Z_R1) { 3974 used_R1 = true; 3975 } 3976 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); 3977 } 3978 assert_different_registers(Rdst, Rbase); 3979 3980 // Check for NULL oop (must be left alone) and shift. 3981 if (oop_shift != 0) { // Shift out alignment bits 3982 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. 3983 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 3984 } else { 3985 z_srlg(Rdst, Rsrc, oop_shift); 3986 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. 3987 // This probably is faster, as it does not write a register. No! 3988 // z_cghi(Rsrc, 0); 3989 } 3990 } else { 3991 z_ltgr(Rdst, Rsrc); // Move NULL to result register. 3992 } 3993 z_bre(done); 3994 3995 // Subtract oop_base components. 3996 if ((Rdst == Z_R0) || (Rbase == Z_R0)) { 3997 z_algr(Rdst, Rbase); 3998 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } 3999 } else { 4000 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); 4001 } 4002 if (!only32bitValid) { 4003 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4004 } 4005 bind(done); 4006 4007 } else { // not null 4008 // Get oop_base components. 4009 if (pow2_offset == -1) { 4010 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); 4011 } 4012 4013 // Subtract oop_base components and shift. 4014 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { 4015 // Don't use lay instruction. 4016 if (Rdst == Rsrc) { 4017 z_algr(Rdst, Rbase); 4018 } else { 4019 lgr_if_needed(Rdst, Rbase); 4020 z_algr(Rdst, Rsrc); 4021 } 4022 if (pow2_offset != 0) add2reg(Rdst, pow2_offset); 4023 } else { 4024 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); 4025 } 4026 if (oop_shift != 0) { // Shift out alignment bits. 4027 z_srlg(Rdst, Rdst, oop_shift); 4028 } 4029 if (!only32bitValid) { 4030 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. 4031 } 4032 } 4033 #ifdef ASSERT 4034 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } 4035 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } 4036 #endif 4037 BLOCK_COMMENT("} cOop encoder general"); 4038 } 4039 4040 //------------------------------------------------- 4041 // decode compressed oop. Generally usable decoder. 4042 //------------------------------------------------- 4043 // Rsrc - contains compressed oop on entry. 4044 // Rdst - contains regular oop on exit. 4045 // Rdst and Rsrc may indicate same register. 4046 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). 4047 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. 4048 // Rbase - register to use for the base 4049 // pow2_offset - offset of base to nice value. If -1, base must be loaded. 4050 // For performance, it is good to 4051 // - avoid Z_R0 for any of the argument registers. 4052 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. 4053 // - avoid Z_R1 for Rdst if Rdst == Rbase. 4054 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) { 4055 4056 const address oop_base = CompressedOops::base(); 4057 const int oop_shift = CompressedOops::shift(); 4058 const bool disjoint = CompressedOops::base_disjoint(); 4059 4060 assert(UseCompressedOops, "must be on to call this method"); 4061 assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder"); 4062 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), 4063 "cOop encoder detected bad shift"); 4064 4065 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. 4066 4067 if (oop_base != NULL) { 4068 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; 4069 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; 4070 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; 4071 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { 4072 BLOCK_COMMENT("cOop decoder disjointBase {"); 4073 // We do not need to load the base. Instead, we can install the upper bits 4074 // with an OR instead of an ADD. 4075 Label done; 4076 4077 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4078 if (maybeNULL) { // NULL ptr must be preserved! 4079 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4080 z_bre(done); 4081 } else { 4082 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4083 } 4084 if ((oop_base_hl != 0) && (oop_base_hh != 0)) { 4085 z_oihf(Rdst, oop_base_hf); 4086 } else if (oop_base_hl != 0) { 4087 z_oihl(Rdst, oop_base_hl); 4088 } else { 4089 assert(oop_base_hh != 0, "not heapbased mode"); 4090 z_oihh(Rdst, oop_base_hh); 4091 } 4092 bind(done); 4093 BLOCK_COMMENT("} cOop decoder disjointBase"); 4094 } else { 4095 BLOCK_COMMENT("cOop decoder general {"); 4096 // There are three decode steps: 4097 // scale oop offset (shift left) 4098 // get base (in reg) and pow2_offset (constant) 4099 // add base, pow2_offset, and oop offset 4100 // The following register overlap situations may exist: 4101 // Rdst == Rsrc, Rbase any other 4102 // not a problem. Scaling in-place leaves Rbase undisturbed. 4103 // Loading Rbase does not impact the scaled offset. 4104 // Rdst == Rbase, Rsrc any other 4105 // scaling would destroy a possibly preloaded Rbase. Loading Rbase 4106 // would destroy the scaled offset. 4107 // Remedy: use Rdst_tmp if Rbase has been preloaded. 4108 // use Rbase_tmp if base has to be loaded. 4109 // Rsrc == Rbase, Rdst any other 4110 // Only possible without preloaded Rbase. 4111 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. 4112 // Rsrc == Rbase, Rdst == Rbase 4113 // Only possible without preloaded Rbase. 4114 // Loading Rbase would destroy compressed oop. Scaling in-place is ok. 4115 // Remedy: use Rbase_tmp. 4116 // 4117 Label done; 4118 Register Rdst_tmp = Rdst; 4119 Register Rbase_tmp = Rbase; 4120 bool used_R0 = false; 4121 bool used_R1 = false; 4122 bool base_preloaded = pow2_offset >= 0; 4123 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); 4124 assert(oop_shift != 0, "room for optimization"); 4125 4126 // Check if we need to use scratch registers. 4127 if (Rdst == Rbase) { 4128 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); 4129 if (Rdst != Rsrc) { 4130 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4131 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } 4132 } else { 4133 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; 4134 } 4135 } 4136 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); 4137 4138 // Scale oop and check for NULL. 4139 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set. 4140 if (maybeNULL) { // NULL ptr must be preserved! 4141 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. 4142 z_bre(done); 4143 } else { 4144 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. 4145 } 4146 4147 // Get oop_base components. 4148 if (!base_preloaded) { 4149 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); 4150 } 4151 4152 // Add up all components. 4153 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { 4154 z_algr(Rdst_tmp, Rbase_tmp); 4155 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } 4156 } else { 4157 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); 4158 } 4159 4160 bind(done); 4161 lgr_if_needed(Rdst, Rdst_tmp); 4162 #ifdef ASSERT 4163 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } 4164 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } 4165 #endif 4166 BLOCK_COMMENT("} cOop decoder general"); 4167 } 4168 } else { 4169 BLOCK_COMMENT("cOop decoder zeroBase {"); 4170 if (oop_shift == 0) { 4171 lgr_if_needed(Rdst, Rsrc); 4172 } else { 4173 z_sllg(Rdst, Rsrc, oop_shift); 4174 } 4175 BLOCK_COMMENT("} cOop decoder zeroBase"); 4176 } 4177 } 4178 4179 // ((OopHandle)result).resolve(); 4180 void MacroAssembler::resolve_oop_handle(Register result) { 4181 // OopHandle::resolve is an indirection. 4182 z_lg(result, 0, result); 4183 } 4184 4185 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { 4186 mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset())); 4187 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes())); 4188 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); 4189 resolve_oop_handle(mirror); 4190 } 4191 4192 void MacroAssembler::load_method_holder(Register holder, Register method) { 4193 mem2reg_opt(holder, Address(method, Method::const_offset())); 4194 mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset())); 4195 mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); 4196 } 4197 4198 //--------------------------------------------------------------- 4199 //--- Operations on arrays. 4200 //--------------------------------------------------------------- 4201 4202 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4203 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4204 // work registers anyway. 4205 // Actually, only r0, r1, and r5 are killed. 4206 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) { 4207 4208 int block_start = offset(); 4209 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4210 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4211 4212 Label doXC, doMVCLE, done; 4213 4214 BLOCK_COMMENT("Clear_Array {"); 4215 4216 // Check for zero len and convert to long. 4217 z_ltgfr(odd_tmp_reg, cnt_arg); 4218 z_bre(done); // Nothing to do if len == 0. 4219 4220 // Prefetch data to be cleared. 4221 if (VM_Version::has_Prefetch()) { 4222 z_pfd(0x02, 0, Z_R0, base_pointer_arg); 4223 z_pfd(0x02, 256, Z_R0, base_pointer_arg); 4224 } 4225 4226 z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear. 4227 z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4228 z_brnh(doXC); // If so, use executed XC to clear. 4229 4230 // MVCLE: initialize long arrays (general case). 4231 bind(doMVCLE); 4232 z_lgr(dst_addr, base_pointer_arg); 4233 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4234 // The even register of the register pair is not killed. 4235 clear_reg(odd_tmp_reg, true, false); 4236 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0); 4237 z_bru(done); 4238 4239 // XC: initialize short arrays. 4240 Label XC_template; // Instr template, never exec directly! 4241 bind(XC_template); 4242 z_xc(0,0,base_pointer_arg,0,base_pointer_arg); 4243 4244 bind(doXC); 4245 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. 4246 if (VM_Version::has_ExecuteExtensions()) { 4247 z_exrl(dst_len, XC_template); // Execute XC with var. len. 4248 } else { 4249 z_larl(odd_tmp_reg, XC_template); 4250 z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len. 4251 } 4252 // z_bru(done); // fallthru 4253 4254 bind(done); 4255 4256 BLOCK_COMMENT("} Clear_Array"); 4257 4258 int block_end = offset(); 4259 return block_end - block_start; 4260 } 4261 4262 // Compiler ensures base is doubleword aligned and cnt is count of doublewords. 4263 // Emitter does not KILL any arguments nor work registers. 4264 // Emitter generates up to 16 XC instructions, depending on the array length. 4265 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { 4266 int block_start = offset(); 4267 int off; 4268 int lineSize_Bytes = AllocatePrefetchStepSize; 4269 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; 4270 bool doPrefetch = VM_Version::has_Prefetch(); 4271 int XC_maxlen = 256; 4272 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; 4273 4274 BLOCK_COMMENT("Clear_Array_Const {"); 4275 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); 4276 4277 // Do less prefetching for very short arrays. 4278 if (numXCInstr > 0) { 4279 // Prefetch only some cache lines, then begin clearing. 4280 if (doPrefetch) { 4281 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, 4282 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. 4283 } else { 4284 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); 4285 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { 4286 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); 4287 } 4288 } 4289 } 4290 4291 for (off=0; off<(numXCInstr-1); off++) { 4292 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); 4293 4294 // Prefetch some cache lines in advance. 4295 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { 4296 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); 4297 } 4298 } 4299 if (off*XC_maxlen < cnt*BytesPerWord) { 4300 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); 4301 } 4302 } 4303 BLOCK_COMMENT("} Clear_Array_Const"); 4304 4305 int block_end = offset(); 4306 return block_end - block_start; 4307 } 4308 4309 // Compiler ensures base is doubleword aligned and cnt is #doublewords. 4310 // Emitter does not KILL cnt and base arguments, since they need to be copied to 4311 // work registers anyway. 4312 // Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed. 4313 // 4314 // For very large arrays, exploit MVCLE H/W support. 4315 // MVCLE instruction automatically exploits H/W-optimized page mover. 4316 // - Bytes up to next page boundary are cleared with a series of XC to self. 4317 // - All full pages are cleared with the page mover H/W assist. 4318 // - Remaining bytes are again cleared by a series of XC to self. 4319 // 4320 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) { 4321 4322 int block_start = offset(); 4323 Register dst_len = Z_R1; // Holds dst len for MVCLE. 4324 Register dst_addr = Z_R0; // Holds dst addr for MVCLE. 4325 4326 BLOCK_COMMENT("Clear_Array_Const_Big {"); 4327 4328 // Get len to clear. 4329 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 4330 4331 // Prepare other args to MVCLE. 4332 z_lgr(dst_addr, base_pointer_arg); 4333 // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0. 4334 // The even register of the register pair is not killed. 4335 (void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero. 4336 MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0); 4337 BLOCK_COMMENT("} Clear_Array_Const_Big"); 4338 4339 int block_end = offset(); 4340 return block_end - block_start; 4341 } 4342 4343 // Allocator. 4344 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, 4345 Register cnt_reg, 4346 Register tmp1_reg, Register tmp2_reg) { 4347 // Tmp1 is oddReg. 4348 // Tmp2 is evenReg. 4349 4350 int block_start = offset(); 4351 Label doMVC, doMVCLE, done, MVC_template; 4352 4353 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); 4354 4355 // Check for zero len and convert to long. 4356 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. 4357 z_bre(done); // Nothing to do if len == 0. 4358 4359 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. 4360 4361 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). 4362 z_brnh(doMVC); // If so, use executed MVC to clear. 4363 4364 bind(doMVCLE); // A lot of data (more than 256 bytes). 4365 // Prep dest reg pair. 4366 z_lgr(Z_R0, dst_reg); // dst addr 4367 // Dst len already in Z_R1. 4368 // Prep src reg pair. 4369 z_lgr(tmp2_reg, src_reg); // src addr 4370 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. 4371 4372 // Do the copy. 4373 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. 4374 z_bru(done); // All done. 4375 4376 bind(MVC_template); // Just some data (not more than 256 bytes). 4377 z_mvc(0, 0, dst_reg, 0, src_reg); 4378 4379 bind(doMVC); 4380 4381 if (VM_Version::has_ExecuteExtensions()) { 4382 add2reg(Z_R1, -1); 4383 } else { 4384 add2reg(tmp1_reg, -1, Z_R1); 4385 z_larl(Z_R1, MVC_template); 4386 } 4387 4388 if (VM_Version::has_Prefetch()) { 4389 z_pfd(1, 0,Z_R0,src_reg); 4390 z_pfd(2, 0,Z_R0,dst_reg); 4391 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. 4392 // z_pfd(2,256,Z_R0,dst_reg); 4393 } 4394 4395 if (VM_Version::has_ExecuteExtensions()) { 4396 z_exrl(Z_R1, MVC_template); 4397 } else { 4398 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4399 } 4400 4401 bind(done); 4402 4403 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4404 4405 int block_end = offset(); 4406 return block_end - block_start; 4407 } 4408 4409 //------------------------------------------------- 4410 // Constants (scalar and oop) in constant pool 4411 //------------------------------------------------- 4412 4413 // Add a non-relocated constant to the CP. 4414 int MacroAssembler::store_const_in_toc(AddressLiteral& val) { 4415 long value = val.value(); 4416 address tocPos = long_constant(value); 4417 4418 if (tocPos != NULL) { 4419 int tocOffset = (int)(tocPos - code()->consts()->start()); 4420 return tocOffset; 4421 } 4422 // Address_constant returned NULL, so no constant entry has been created. 4423 // In that case, we return a "fatal" offset, just in case that subsequently 4424 // generated access code is executed. 4425 return -1; 4426 } 4427 4428 // Returns the TOC offset where the address is stored. 4429 // Add a relocated constant to the CP. 4430 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { 4431 // Use RelocationHolder::none for the constant pool entry. 4432 // Otherwise we will end up with a failing NativeCall::verify(x), 4433 // where x is the address of the constant pool entry. 4434 address tocPos = address_constant((address)oop.value(), RelocationHolder::none); 4435 4436 if (tocPos != NULL) { 4437 int tocOffset = (int)(tocPos - code()->consts()->start()); 4438 RelocationHolder rsp = oop.rspec(); 4439 Relocation *rel = rsp.reloc(); 4440 4441 // Store toc_offset in relocation, used by call_far_patchable. 4442 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { 4443 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); 4444 } 4445 // Relocate at the load's pc. 4446 relocate(rsp); 4447 4448 return tocOffset; 4449 } 4450 // Address_constant returned NULL, so no constant entry has been created 4451 // in that case, we return a "fatal" offset, just in case that subsequently 4452 // generated access code is executed. 4453 return -1; 4454 } 4455 4456 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4457 int tocOffset = store_const_in_toc(a); 4458 if (tocOffset == -1) return false; 4459 address tocPos = tocOffset + code()->consts()->start(); 4460 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4461 relocate(a.rspec()); 4462 load_long_pcrelative(dst, tocPos); 4463 return true; 4464 } 4465 4466 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { 4467 int tocOffset = store_oop_in_toc(a); 4468 if (tocOffset == -1) return false; 4469 address tocPos = tocOffset + code()->consts()->start(); 4470 assert((address)code()->consts()->start() != NULL, "Please add CP address"); 4471 4472 load_addr_pcrelative(dst, tocPos); 4473 return true; 4474 } 4475 4476 // If the instruction sequence at the given pc is a load_const_from_toc 4477 // sequence, return the value currently stored at the referenced position 4478 // in the TOC. 4479 intptr_t MacroAssembler::get_const_from_toc(address pc) { 4480 4481 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4482 4483 long offset = get_load_const_from_toc_offset(pc); 4484 address dataLoc = NULL; 4485 if (is_load_const_from_toc_pcrelative(pc)) { 4486 dataLoc = pc + offset; 4487 } else { 4488 CodeBlob* cb = CodeCache::find_blob_unsafe(pc); // Else we get assertion if nmethod is zombie. 4489 assert(cb && cb->is_nmethod(), "sanity"); 4490 nmethod* nm = (nmethod*)cb; 4491 dataLoc = nm->ctable_begin() + offset; 4492 } 4493 return *(intptr_t *)dataLoc; 4494 } 4495 4496 // If the instruction sequence at the given pc is a load_const_from_toc 4497 // sequence, copy the passed-in new_data value into the referenced 4498 // position in the TOC. 4499 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { 4500 assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); 4501 4502 long offset = MacroAssembler::get_load_const_from_toc_offset(pc); 4503 address dataLoc = NULL; 4504 if (is_load_const_from_toc_pcrelative(pc)) { 4505 dataLoc = pc+offset; 4506 } else { 4507 nmethod* nm = CodeCache::find_nmethod(pc); 4508 assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); 4509 dataLoc = nm->ctable_begin() + offset; 4510 } 4511 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. 4512 *(unsigned long *)dataLoc = new_data; 4513 } 4514 } 4515 4516 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc 4517 // site. Verify by calling is_load_const_from_toc() before!! 4518 // Offset is +/- 2**32 -> use long. 4519 long MacroAssembler::get_load_const_from_toc_offset(address a) { 4520 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); 4521 // expected code sequence: 4522 // z_lgrl(t, simm32); len = 6 4523 unsigned long inst; 4524 unsigned int len = get_instruction(a, &inst); 4525 return get_pcrel_offset(inst); 4526 } 4527 4528 //********************************************************************************** 4529 // inspection of generated instruction sequences for a particular pattern 4530 //********************************************************************************** 4531 4532 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { 4533 #ifdef ASSERT 4534 unsigned long inst; 4535 unsigned int len = get_instruction(a+2, &inst); 4536 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { 4537 const int range = 128; 4538 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); 4539 VM_Version::z_SIGSEGV(); 4540 } 4541 #endif 4542 // expected code sequence: 4543 // z_lgrl(t, relAddr32); len = 6 4544 //TODO: verify accessed data is in CP, if possible. 4545 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. 4546 } 4547 4548 bool MacroAssembler::is_load_const_from_toc_call(address a) { 4549 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); 4550 } 4551 4552 bool MacroAssembler::is_load_const_call(address a) { 4553 return is_load_const(a) && is_call_byregister(a + load_const_size()); 4554 } 4555 4556 //------------------------------------------------- 4557 // Emitters for some really CICS instructions 4558 //------------------------------------------------- 4559 4560 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { 4561 assert(dst->encoding()%2==0, "must be an even/odd register pair"); 4562 assert(src->encoding()%2==0, "must be an even/odd register pair"); 4563 assert(pad<256, "must be a padding BYTE"); 4564 4565 Label retry; 4566 bind(retry); 4567 Assembler::z_mvcle(dst, src, pad); 4568 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4569 } 4570 4571 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { 4572 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4573 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4574 assert(pad<256, "must be a padding BYTE"); 4575 4576 Label retry; 4577 bind(retry); 4578 Assembler::z_clcle(left, right, pad, Z_R0); 4579 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4580 } 4581 4582 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { 4583 assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); 4584 assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); 4585 assert(pad<=0xfff, "must be a padding HALFWORD"); 4586 assert(VM_Version::has_ETF2(), "instruction must be available"); 4587 4588 Label retry; 4589 bind(retry); 4590 Assembler::z_clclu(left, right, pad, Z_R0); 4591 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4592 } 4593 4594 void MacroAssembler::search_string(Register end, Register start) { 4595 assert(end->encoding() != 0, "end address must not be in R0"); 4596 assert(start->encoding() != 0, "start address must not be in R0"); 4597 4598 Label retry; 4599 bind(retry); 4600 Assembler::z_srst(end, start); 4601 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4602 } 4603 4604 void MacroAssembler::search_string_uni(Register end, Register start) { 4605 assert(end->encoding() != 0, "end address must not be in R0"); 4606 assert(start->encoding() != 0, "start address must not be in R0"); 4607 assert(VM_Version::has_ETF3(), "instruction must be available"); 4608 4609 Label retry; 4610 bind(retry); 4611 Assembler::z_srstu(end, start); 4612 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4613 } 4614 4615 void MacroAssembler::kmac(Register srcBuff) { 4616 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4617 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4618 4619 Label retry; 4620 bind(retry); 4621 Assembler::z_kmac(Z_R0, srcBuff); 4622 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4623 } 4624 4625 void MacroAssembler::kimd(Register srcBuff) { 4626 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4627 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4628 4629 Label retry; 4630 bind(retry); 4631 Assembler::z_kimd(Z_R0, srcBuff); 4632 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4633 } 4634 4635 void MacroAssembler::klmd(Register srcBuff) { 4636 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4637 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); 4638 4639 Label retry; 4640 bind(retry); 4641 Assembler::z_klmd(Z_R0, srcBuff); 4642 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4643 } 4644 4645 void MacroAssembler::km(Register dstBuff, Register srcBuff) { 4646 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4647 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4648 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4649 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4650 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4651 4652 Label retry; 4653 bind(retry); 4654 Assembler::z_km(dstBuff, srcBuff); 4655 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4656 } 4657 4658 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { 4659 // DstBuff and srcBuff are allowed to be the same register (encryption in-place). 4660 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. 4661 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); 4662 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); 4663 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4664 4665 Label retry; 4666 bind(retry); 4667 Assembler::z_kmc(dstBuff, srcBuff); 4668 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4669 } 4670 4671 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { 4672 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); 4673 4674 Label retry; 4675 bind(retry); 4676 Assembler::z_cksm(crcBuff, srcBuff); 4677 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4678 } 4679 4680 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { 4681 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4682 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4683 4684 Label retry; 4685 bind(retry); 4686 Assembler::z_troo(r1, r2, m3); 4687 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4688 } 4689 4690 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { 4691 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4692 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4693 4694 Label retry; 4695 bind(retry); 4696 Assembler::z_trot(r1, r2, m3); 4697 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4698 } 4699 4700 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { 4701 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4702 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4703 4704 Label retry; 4705 bind(retry); 4706 Assembler::z_trto(r1, r2, m3); 4707 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4708 } 4709 4710 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { 4711 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); 4712 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); 4713 4714 Label retry; 4715 bind(retry); 4716 Assembler::z_trtt(r1, r2, m3); 4717 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); 4718 } 4719 4720 //--------------------------------------- 4721 // Helpers for Intrinsic Emitters 4722 //--------------------------------------- 4723 4724 /** 4725 * uint32_t crc; 4726 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4727 */ 4728 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { 4729 assert_different_registers(crc, table, tmp); 4730 assert_different_registers(val, table); 4731 if (crc == val) { // Must rotate first to use the unmodified value. 4732 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4733 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4734 } else { 4735 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. 4736 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. 4737 } 4738 z_x(crc, Address(table, tmp, 0)); 4739 } 4740 4741 /** 4742 * uint32_t crc; 4743 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 4744 */ 4745 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 4746 fold_byte_crc32(crc, crc, table, tmp); 4747 } 4748 4749 /** 4750 * Emits code to update CRC-32 with a byte value according to constants in table. 4751 * 4752 * @param [in,out]crc Register containing the crc. 4753 * @param [in]val Register containing the byte to fold into the CRC. 4754 * @param [in]table Register containing the table of crc constants. 4755 * 4756 * uint32_t crc; 4757 * val = crc_table[(val ^ crc) & 0xFF]; 4758 * crc = val ^ (crc >> 8); 4759 */ 4760 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 4761 z_xr(val, crc); 4762 fold_byte_crc32(crc, val, table, val); 4763 } 4764 4765 4766 /** 4767 * @param crc register containing existing CRC (32-bit) 4768 * @param buf register pointing to input byte buffer (byte*) 4769 * @param len register containing number of bytes 4770 * @param table register pointing to CRC table 4771 */ 4772 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { 4773 assert_different_registers(crc, buf, len, table, data); 4774 4775 Label L_mainLoop, L_done; 4776 const int mainLoop_stepping = 1; 4777 4778 // Process all bytes in a single-byte loop. 4779 z_ltr(len, len); 4780 z_brnh(L_done); 4781 4782 bind(L_mainLoop); 4783 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4784 add2reg(buf, mainLoop_stepping); // Advance buffer position. 4785 update_byte_crc32(crc, data, table); 4786 z_brct(len, L_mainLoop); // Iterate. 4787 4788 bind(L_done); 4789 } 4790 4791 /** 4792 * Emits code to update CRC-32 with a 4-byte value according to constants in table. 4793 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. 4794 * 4795 */ 4796 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 4797 Register t0, Register t1, Register t2, Register t3) { 4798 // This is what we implement (the DOBIG4 part): 4799 // 4800 // #define DOBIG4 c ^= *++buf4; \ 4801 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ 4802 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] 4803 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 4804 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. 4805 const int ix0 = 4*(4*CRC32_COLUMN_SIZE); 4806 const int ix1 = 5*(4*CRC32_COLUMN_SIZE); 4807 const int ix2 = 6*(4*CRC32_COLUMN_SIZE); 4808 const int ix3 = 7*(4*CRC32_COLUMN_SIZE); 4809 4810 // XOR crc with next four bytes of buffer. 4811 lgr_if_needed(t0, crc); 4812 z_x(t0, Address(buf, bufDisp)); 4813 if (bufInc != 0) { 4814 add2reg(buf, bufInc); 4815 } 4816 4817 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. 4818 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 4819 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 4820 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 4821 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 4822 4823 // XOR indexed table values to calculate updated crc. 4824 z_ly(t2, Address(table, t2, (intptr_t)ix1)); 4825 z_ly(t0, Address(table, t0, (intptr_t)ix3)); 4826 z_xy(t2, Address(table, t3, (intptr_t)ix0)); 4827 z_xy(t0, Address(table, t1, (intptr_t)ix2)); 4828 z_xr(t0, t2); // Now t0 contains the updated CRC value. 4829 lgr_if_needed(crc, t0); 4830 } 4831 4832 /** 4833 * @param crc register containing existing CRC (32-bit) 4834 * @param buf register pointing to input byte buffer (byte*) 4835 * @param len register containing number of bytes 4836 * @param table register pointing to CRC table 4837 * 4838 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! 4839 */ 4840 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 4841 Register t0, Register t1, Register t2, Register t3, 4842 bool invertCRC) { 4843 assert_different_registers(crc, buf, len, table); 4844 4845 Label L_mainLoop, L_tail; 4846 Register data = t0; 4847 Register ctr = Z_R0; 4848 const int mainLoop_stepping = 4; 4849 const int log_stepping = exact_log2(mainLoop_stepping); 4850 4851 // Don't test for len <= 0 here. This pathological case should not occur anyway. 4852 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. 4853 // The situation itself is detected and handled correctly by the conditional branches 4854 // following aghi(len, -stepping) and aghi(len, +stepping). 4855 4856 if (invertCRC) { 4857 not_(crc, noreg, false); // 1s complement of crc 4858 } 4859 4860 // Check for short (<4 bytes) buffer. 4861 z_srag(ctr, len, log_stepping); 4862 z_brnh(L_tail); 4863 4864 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. 4865 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop 4866 4867 BIND(L_mainLoop); 4868 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); 4869 z_brct(ctr, L_mainLoop); // Iterate. 4870 4871 z_lrvr(crc, crc); // Revert byte order back to original. 4872 4873 // Process last few (<8) bytes of buffer. 4874 BIND(L_tail); 4875 update_byteLoop_crc32(crc, buf, len, table, data); 4876 4877 if (invertCRC) { 4878 not_(crc, noreg, false); // 1s complement of crc 4879 } 4880 } 4881 4882 /** 4883 * @param crc register containing existing CRC (32-bit) 4884 * @param buf register pointing to input byte buffer (byte*) 4885 * @param len register containing number of bytes 4886 * @param table register pointing to CRC table 4887 */ 4888 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 4889 Register t0, Register t1, Register t2, Register t3, 4890 bool invertCRC) { 4891 assert_different_registers(crc, buf, len, table); 4892 Register data = t0; 4893 4894 if (invertCRC) { 4895 not_(crc, noreg, false); // 1s complement of crc 4896 } 4897 4898 update_byteLoop_crc32(crc, buf, len, table, data); 4899 4900 if (invertCRC) { 4901 not_(crc, noreg, false); // 1s complement of crc 4902 } 4903 } 4904 4905 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 4906 bool invertCRC) { 4907 assert_different_registers(crc, buf, len, table, tmp); 4908 4909 if (invertCRC) { 4910 not_(crc, noreg, false); // 1s complement of crc 4911 } 4912 4913 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. 4914 update_byte_crc32(crc, tmp, table); 4915 4916 if (invertCRC) { 4917 not_(crc, noreg, false); // 1s complement of crc 4918 } 4919 } 4920 4921 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, 4922 bool invertCRC) { 4923 assert_different_registers(crc, val, table); 4924 4925 if (invertCRC) { 4926 not_(crc, noreg, false); // 1s complement of crc 4927 } 4928 4929 update_byte_crc32(crc, val, table); 4930 4931 if (invertCRC) { 4932 not_(crc, noreg, false); // 1s complement of crc 4933 } 4934 } 4935 4936 // 4937 // Code for BigInteger::multiplyToLen() intrinsic. 4938 // 4939 4940 // dest_lo += src1 + src2 4941 // dest_hi += carry1 + carry2 4942 // Z_R7 is destroyed ! 4943 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, 4944 Register src1, Register src2) { 4945 clear_reg(Z_R7); 4946 z_algr(dest_lo, src1); 4947 z_alcgr(dest_hi, Z_R7); 4948 z_algr(dest_lo, src2); 4949 z_alcgr(dest_hi, Z_R7); 4950 } 4951 4952 // Multiply 64 bit by 64 bit first loop. 4953 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 4954 Register x_xstart, 4955 Register y, Register y_idx, 4956 Register z, 4957 Register carry, 4958 Register product, 4959 Register idx, Register kdx) { 4960 // jlong carry, x[], y[], z[]; 4961 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4962 // huge_128 product = y[idx] * x[xstart] + carry; 4963 // z[kdx] = (jlong)product; 4964 // carry = (jlong)(product >>> 64); 4965 // } 4966 // z[xstart] = carry; 4967 4968 Label L_first_loop, L_first_loop_exit; 4969 Label L_one_x, L_one_y, L_multiply; 4970 4971 z_aghi(xstart, -1); 4972 z_brl(L_one_x); // Special case: length of x is 1. 4973 4974 // Load next two integers of x. 4975 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 4976 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 4977 4978 4979 bind(L_first_loop); 4980 4981 z_aghi(idx, -1); 4982 z_brl(L_first_loop_exit); 4983 z_aghi(idx, -1); 4984 z_brl(L_one_y); 4985 4986 // Load next two integers of y. 4987 z_sllg(Z_R1_scratch, idx, LogBytesPerInt); 4988 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); 4989 4990 4991 bind(L_multiply); 4992 4993 Register multiplicand = product->successor(); 4994 Register product_low = multiplicand; 4995 4996 lgr_if_needed(multiplicand, x_xstart); 4997 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand 4998 clear_reg(Z_R7); 4999 z_algr(product_low, carry); // Add carry to result. 5000 z_alcgr(product, Z_R7); // Add carry of the last addition. 5001 add2reg(kdx, -2); 5002 5003 // Store result. 5004 z_sllg(Z_R7, kdx, LogBytesPerInt); 5005 reg2mem_opt(product_low, Address(z, Z_R7, 0)); 5006 lgr_if_needed(carry, product); 5007 z_bru(L_first_loop); 5008 5009 5010 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 5011 5012 clear_reg(y_idx); 5013 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); 5014 z_bru(L_multiply); 5015 5016 5017 bind(L_one_x); // Load one 32 bit portion of x as (0,value). 5018 5019 clear_reg(x_xstart); 5020 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5021 z_bru(L_first_loop); 5022 5023 bind(L_first_loop_exit); 5024 } 5025 5026 // Multiply 64 bit by 64 bit and add 128 bit. 5027 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 5028 Register z, 5029 Register yz_idx, Register idx, 5030 Register carry, Register product, 5031 int offset) { 5032 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 5033 // z[kdx] = (jlong)product; 5034 5035 Register multiplicand = product->successor(); 5036 Register product_low = multiplicand; 5037 5038 z_sllg(Z_R7, idx, LogBytesPerInt); 5039 mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); 5040 5041 lgr_if_needed(multiplicand, x_xstart); 5042 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5043 mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); 5044 5045 add2_with_carry(product, product_low, carry, yz_idx); 5046 5047 z_sllg(Z_R7, idx, LogBytesPerInt); 5048 reg2mem_opt(product_low, Address(z, Z_R7, offset)); 5049 5050 } 5051 5052 // Multiply 128 bit by 128 bit. Unrolled inner loop. 5053 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 5054 Register y, Register z, 5055 Register yz_idx, Register idx, 5056 Register jdx, 5057 Register carry, Register product, 5058 Register carry2) { 5059 // jlong carry, x[], y[], z[]; 5060 // int kdx = ystart+1; 5061 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 5062 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 5063 // z[kdx+idx+1] = (jlong)product; 5064 // jlong carry2 = (jlong)(product >>> 64); 5065 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 5066 // z[kdx+idx] = (jlong)product; 5067 // carry = (jlong)(product >>> 64); 5068 // } 5069 // idx += 2; 5070 // if (idx > 0) { 5071 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 5072 // z[kdx+idx] = (jlong)product; 5073 // carry = (jlong)(product >>> 64); 5074 // } 5075 5076 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 5077 5078 // scale the index 5079 lgr_if_needed(jdx, idx); 5080 and_imm(jdx, 0xfffffffffffffffcL); 5081 rshift(jdx, 2); 5082 5083 5084 bind(L_third_loop); 5085 5086 z_aghi(jdx, -1); 5087 z_brl(L_third_loop_exit); 5088 add2reg(idx, -4); 5089 5090 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); 5091 lgr_if_needed(carry2, product); 5092 5093 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); 5094 lgr_if_needed(carry, product); 5095 z_bru(L_third_loop); 5096 5097 5098 bind(L_third_loop_exit); // Handle any left-over operand parts. 5099 5100 and_imm(idx, 0x3); 5101 z_brz(L_post_third_loop_done); 5102 5103 Label L_check_1; 5104 5105 z_aghi(idx, -2); 5106 z_brl(L_check_1); 5107 5108 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); 5109 lgr_if_needed(carry, product); 5110 5111 5112 bind(L_check_1); 5113 5114 add2reg(idx, 0x2); 5115 and_imm(idx, 0x1); 5116 z_aghi(idx, -1); 5117 z_brl(L_post_third_loop_done); 5118 5119 Register multiplicand = product->successor(); 5120 Register product_low = multiplicand; 5121 5122 z_sllg(Z_R7, idx, LogBytesPerInt); 5123 clear_reg(yz_idx); 5124 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); 5125 lgr_if_needed(multiplicand, x_xstart); 5126 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand 5127 clear_reg(yz_idx); 5128 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); 5129 5130 add2_with_carry(product, product_low, yz_idx, carry); 5131 5132 z_sllg(Z_R7, idx, LogBytesPerInt); 5133 reg2mem_opt(product_low, Address(z, Z_R7, 0), false); 5134 rshift(product_low, 32); 5135 5136 lshift(product, 32); 5137 z_ogr(product_low, product); 5138 lgr_if_needed(carry, product_low); 5139 5140 bind(L_post_third_loop_done); 5141 } 5142 5143 void MacroAssembler::multiply_to_len(Register x, Register xlen, 5144 Register y, Register ylen, 5145 Register z, 5146 Register tmp1, Register tmp2, 5147 Register tmp3, Register tmp4, 5148 Register tmp5) { 5149 ShortBranchVerifier sbv(this); 5150 5151 assert_different_registers(x, xlen, y, ylen, z, 5152 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); 5153 assert_different_registers(x, xlen, y, ylen, z, 5154 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); 5155 5156 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5157 5158 // In openJdk, we store the argument as 32-bit value to slot. 5159 Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian. 5160 5161 const Register idx = tmp1; 5162 const Register kdx = tmp2; 5163 const Register xstart = tmp3; 5164 5165 const Register y_idx = tmp4; 5166 const Register carry = tmp5; 5167 const Register product = Z_R0_scratch; 5168 const Register x_xstart = Z_R8; 5169 5170 // First Loop. 5171 // 5172 // final static long LONG_MASK = 0xffffffffL; 5173 // int xstart = xlen - 1; 5174 // int ystart = ylen - 1; 5175 // long carry = 0; 5176 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 5177 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 5178 // z[kdx] = (int)product; 5179 // carry = product >>> 32; 5180 // } 5181 // z[xstart] = (int)carry; 5182 // 5183 5184 lgr_if_needed(idx, ylen); // idx = ylen 5185 z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended. 5186 clear_reg(carry); // carry = 0 5187 5188 Label L_done; 5189 5190 lgr_if_needed(xstart, xlen); 5191 z_aghi(xstart, -1); 5192 z_brl(L_done); 5193 5194 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 5195 5196 NearLabel L_second_loop; 5197 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); 5198 5199 NearLabel L_carry; 5200 z_aghi(kdx, -1); 5201 z_brz(L_carry); 5202 5203 // Store lower 32 bits of carry. 5204 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5205 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5206 rshift(carry, 32); 5207 z_aghi(kdx, -1); 5208 5209 5210 bind(L_carry); 5211 5212 // Store upper 32 bits of carry. 5213 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); 5214 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5215 5216 // Second and third (nested) loops. 5217 // 5218 // for (int i = xstart-1; i >= 0; i--) { // Second loop 5219 // carry = 0; 5220 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 5221 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 5222 // (z[k] & LONG_MASK) + carry; 5223 // z[k] = (int)product; 5224 // carry = product >>> 32; 5225 // } 5226 // z[i] = (int)carry; 5227 // } 5228 // 5229 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 5230 5231 const Register jdx = tmp1; 5232 5233 bind(L_second_loop); 5234 5235 clear_reg(carry); // carry = 0; 5236 lgr_if_needed(jdx, ylen); // j = ystart+1 5237 5238 z_aghi(xstart, -1); // i = xstart-1; 5239 z_brl(L_done); 5240 5241 // Use free slots in the current stackframe instead of push/pop. 5242 Address zsave(Z_SP, _z_abi(carg_1)); 5243 reg2mem_opt(z, zsave); 5244 5245 5246 Label L_last_x; 5247 5248 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5249 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j 5250 z_aghi(xstart, -1); // i = xstart-1; 5251 z_brl(L_last_x); 5252 5253 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); 5254 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); 5255 5256 5257 Label L_third_loop_prologue; 5258 5259 bind(L_third_loop_prologue); 5260 5261 Address xsave(Z_SP, _z_abi(carg_2)); 5262 Address xlensave(Z_SP, _z_abi(carg_3)); 5263 Address ylensave(Z_SP, _z_abi(carg_4)); 5264 5265 reg2mem_opt(x, xsave); 5266 reg2mem_opt(xstart, xlensave); 5267 reg2mem_opt(ylen, ylensave); 5268 5269 5270 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); 5271 5272 mem2reg_opt(z, zsave); 5273 mem2reg_opt(x, xsave); 5274 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! 5275 mem2reg_opt(ylen, ylensave); 5276 5277 add2reg(tmp3, 1, xlen); 5278 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5279 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5280 z_aghi(tmp3, -1); 5281 z_brl(L_done); 5282 5283 rshift(carry, 32); 5284 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); 5285 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); 5286 z_bru(L_second_loop); 5287 5288 // Next infrequent code is moved outside loops. 5289 bind(L_last_x); 5290 5291 clear_reg(x_xstart); 5292 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); 5293 z_bru(L_third_loop_prologue); 5294 5295 bind(L_done); 5296 5297 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); 5298 } 5299 5300 #ifndef PRODUCT 5301 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). 5302 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 5303 Label ok; 5304 if (check_equal) { 5305 z_bre(ok); 5306 } else { 5307 z_brne(ok); 5308 } 5309 stop(msg, id); 5310 bind(ok); 5311 } 5312 5313 // Assert if CC indicates "low". 5314 void MacroAssembler::asm_assert_low(const char *msg, int id) { 5315 Label ok; 5316 z_brnl(ok); 5317 stop(msg, id); 5318 bind(ok); 5319 } 5320 5321 // Assert if CC indicates "high". 5322 void MacroAssembler::asm_assert_high(const char *msg, int id) { 5323 Label ok; 5324 z_brnh(ok); 5325 stop(msg, id); 5326 bind(ok); 5327 } 5328 5329 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false) 5330 // generate non-relocatable code. 5331 void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) { 5332 Label ok; 5333 if (check_equal) { z_bre(ok); } 5334 else { z_brne(ok); } 5335 stop_static(msg, id); 5336 bind(ok); 5337 } 5338 5339 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, 5340 Register mem_base, const char* msg, int id) { 5341 switch (size) { 5342 case 4: 5343 load_and_test_int(Z_R0, Address(mem_base, mem_offset)); 5344 break; 5345 case 8: 5346 load_and_test_long(Z_R0, Address(mem_base, mem_offset)); 5347 break; 5348 default: 5349 ShouldNotReachHere(); 5350 } 5351 if (allow_relocation) { asm_assert(check_equal, msg, id); } 5352 else { asm_assert_static(check_equal, msg, id); } 5353 } 5354 5355 // Check the condition 5356 // expected_size == FP - SP 5357 // after transformation: 5358 // expected_size - FP + SP == 0 5359 // Destroys Register expected_size if no tmp register is passed. 5360 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { 5361 if (tmp == noreg) { 5362 tmp = expected_size; 5363 } else { 5364 if (tmp != expected_size) { 5365 z_lgr(tmp, expected_size); 5366 } 5367 z_algr(tmp, Z_SP); 5368 z_slg(tmp, 0, Z_R0, Z_SP); 5369 asm_assert_eq(msg, id); 5370 } 5371 } 5372 #endif // !PRODUCT 5373 5374 void MacroAssembler::verify_thread() { 5375 if (VerifyThread) { 5376 unimplemented("", 117); 5377 } 5378 } 5379 5380 // Save and restore functions: Exclude Z_R0. 5381 void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { 5382 z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; 5383 if (include_fp) { 5384 z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; 5385 z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; 5386 z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; 5387 z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; 5388 z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; 5389 z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; 5390 z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; 5391 z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; 5392 } 5393 if (include_flags) { 5394 Label done; 5395 z_mvi(Address(dst, offset), 2); // encoding: equal 5396 z_bre(done); 5397 z_mvi(Address(dst, offset), 4); // encoding: higher 5398 z_brh(done); 5399 z_mvi(Address(dst, offset), 1); // encoding: lower 5400 bind(done); 5401 } 5402 } 5403 void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { 5404 z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; 5405 if (include_fp) { 5406 z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; 5407 z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; 5408 z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; 5409 z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; 5410 z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; 5411 z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; 5412 z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; 5413 z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; 5414 } 5415 if (include_flags) { 5416 z_cli(Address(src, offset), 2); // see encoding above 5417 } 5418 } 5419 5420 // Plausibility check for oops. 5421 void MacroAssembler::verify_oop(Register oop, const char* msg) { 5422 if (!VerifyOops) return; 5423 5424 BLOCK_COMMENT("verify_oop {"); 5425 unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; 5426 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5427 5428 save_return_pc(); 5429 5430 // Push frame, but preserve flags 5431 z_lgr(Z_R0, Z_SP); 5432 z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); 5433 z_stg(Z_R0, _z_abi(callers_sp), Z_SP); 5434 5435 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5436 5437 lgr_if_needed(Z_ARG2, oop); 5438 load_const_optimized(Z_ARG1, (address)msg); 5439 load_const_optimized(Z_R1, entry_addr); 5440 z_lg(Z_R1, 0, Z_R1); 5441 call_c(Z_R1); 5442 5443 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); 5444 pop_frame(); 5445 restore_return_pc(); 5446 5447 BLOCK_COMMENT("} verify_oop "); 5448 } 5449 5450 void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { 5451 if (!VerifyOops) return; 5452 5453 BLOCK_COMMENT("verify_oop {"); 5454 unsigned int nbytes_save = (5 + 8) * BytesPerWord; 5455 address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); 5456 5457 save_return_pc(); 5458 unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 5459 save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5460 5461 z_lg(Z_ARG2, addr.plus_disp(frame_size)); 5462 load_const_optimized(Z_ARG1, (address)msg); 5463 load_const_optimized(Z_R1, entry_addr); 5464 z_lg(Z_R1, 0, Z_R1); 5465 call_c(Z_R1); 5466 5467 restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 5468 pop_frame(); 5469 restore_return_pc(); 5470 5471 BLOCK_COMMENT("} verify_oop "); 5472 } 5473 5474 const char* MacroAssembler::stop_types[] = { 5475 "stop", 5476 "untested", 5477 "unimplemented", 5478 "shouldnotreachhere" 5479 }; 5480 5481 static void stop_on_request(const char* tp, const char* msg) { 5482 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); 5483 guarantee(false, "Z assembly code requires stop: %s", msg); 5484 } 5485 5486 void MacroAssembler::stop(int type, const char* msg, int id) { 5487 BLOCK_COMMENT(err_msg("stop: %s {", msg)); 5488 5489 // Setup arguments. 5490 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5491 load_const(Z_ARG2, (void*) msg); 5492 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. 5493 save_return_pc(); // Saves return pc Z_R14. 5494 push_frame_abi160(0); 5495 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5496 // The plain disassembler does not recognize illtrap. It instead displays 5497 // a 32-bit value. Issuing two illtraps assures the disassembler finds 5498 // the proper beginning of the next instruction. 5499 z_illtrap(); // Illegal instruction. 5500 z_illtrap(); // Illegal instruction. 5501 5502 BLOCK_COMMENT(" } stop"); 5503 } 5504 5505 // Special version of stop() for code size reduction. 5506 // Reuses the previously generated call sequence, if any. 5507 // Generates the call sequence on its own, if necessary. 5508 // Note: This code will work only in non-relocatable code! 5509 // The relative address of the data elements (arg1, arg2) must not change. 5510 // The reentry point must not move relative to it's users. This prerequisite 5511 // should be given for "hand-written" code, if all chain calls are in the same code blob. 5512 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. 5513 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { 5514 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg)); 5515 5516 // Setup arguments. 5517 if (allow_relocation) { 5518 // Relocatable version (for comparison purposes). Remove after some time. 5519 load_const(Z_ARG1, (void*) stop_types[type%stop_end]); 5520 load_const(Z_ARG2, (void*) msg); 5521 } else { 5522 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); 5523 load_absolute_address(Z_ARG2, (address)msg); 5524 } 5525 if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { 5526 BLOCK_COMMENT("branch to reentry point:"); 5527 z_brc(bcondAlways, reentry); 5528 } else { 5529 BLOCK_COMMENT("reentry point:"); 5530 reentry = pc(); // Re-entry point for subsequent stop calls. 5531 save_return_pc(); // Saves return pc Z_R14. 5532 push_frame_abi160(0); 5533 if (allow_relocation) { 5534 reentry = NULL; // Prevent reentry if code relocation is allowed. 5535 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5536 } else { 5537 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); 5538 } 5539 z_illtrap(); // Illegal instruction as emergency stop, should the above call return. 5540 } 5541 BLOCK_COMMENT(" } stop_chain"); 5542 5543 return reentry; 5544 } 5545 5546 // Special version of stop() for code size reduction. 5547 // Assumes constant relative addresses for data and runtime call. 5548 void MacroAssembler::stop_static(int type, const char* msg, int id) { 5549 stop_chain(NULL, type, msg, id, false); 5550 } 5551 5552 void MacroAssembler::stop_subroutine() { 5553 unimplemented("stop_subroutine", 710); 5554 } 5555 5556 // Prints msg to stdout from within generated code.. 5557 void MacroAssembler::warn(const char* msg) { 5558 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); 5559 load_absolute_address(Z_R1, (address) warning); 5560 load_absolute_address(Z_ARG1, (address) msg); 5561 (void) call(Z_R1); 5562 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); 5563 } 5564 5565 #ifndef PRODUCT 5566 5567 // Write pattern 0x0101010101010101 in region [low-before, high+after]. 5568 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { 5569 if (!ZapEmptyStackFields) return; 5570 BLOCK_COMMENT("zap memory region {"); 5571 load_const_optimized(val, 0x0101010101010101); 5572 int size = before + after; 5573 if (low == high && size < 5 && size > 0) { 5574 int offset = -before*BytesPerWord; 5575 for (int i = 0; i < size; ++i) { 5576 z_stg(val, Address(low, offset)); 5577 offset +=(1*BytesPerWord); 5578 } 5579 } else { 5580 add2reg(addr, -before*BytesPerWord, low); 5581 if (after) { 5582 #ifdef ASSERT 5583 jlong check = after * BytesPerWord; 5584 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); 5585 #endif 5586 add2reg(high, after * BytesPerWord); 5587 } 5588 NearLabel loop; 5589 bind(loop); 5590 z_stg(val, Address(addr)); 5591 add2reg(addr, 8); 5592 compare64_and_branch(addr, high, bcondNotHigh, loop); 5593 if (after) { 5594 add2reg(high, -after * BytesPerWord); 5595 } 5596 } 5597 BLOCK_COMMENT("} zap memory region"); 5598 } 5599 #endif // !PRODUCT 5600 5601 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { 5602 _masm = masm; 5603 _masm->load_absolute_address(_rscratch, (address)flag_addr); 5604 _masm->load_and_test_int(_rscratch, Address(_rscratch)); 5605 if (value) { 5606 _masm->z_brne(_label); // Skip if true, i.e. != 0. 5607 } else { 5608 _masm->z_bre(_label); // Skip if false, i.e. == 0. 5609 } 5610 } 5611 5612 SkipIfEqual::~SkipIfEqual() { 5613 _masm->bind(_label); 5614 }