1 /*
   2  * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2013, 2026 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "cds/cdsConfig.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "gc/shared/tlab_globals.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interpreterRuntime.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "interpreter/templateInterpreter.hpp"
  35 #include "interpreter/templateTable.hpp"
  36 #include "memory/universe.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "oops/methodCounters.hpp"
  39 #include "oops/methodData.hpp"
  40 #include "oops/objArrayKlass.hpp"
  41 #include "oops/oop.inline.hpp"
  42 #include "oops/resolvedFieldEntry.hpp"
  43 #include "oops/resolvedIndyEntry.hpp"
  44 #include "oops/resolvedMethodEntry.hpp"
  45 #include "prims/jvmtiExport.hpp"
  46 #include "prims/methodHandles.hpp"
  47 #include "runtime/frame.inline.hpp"
  48 #include "runtime/safepointMechanism.hpp"
  49 #include "runtime/sharedRuntime.hpp"
  50 #include "runtime/stubRoutines.hpp"
  51 #include "runtime/synchronizer.hpp"
  52 #include "runtime/vm_version.hpp"
  53 #include "utilities/macros.hpp"
  54 #include "utilities/powerOfTwo.hpp"
  55 
  56 #undef __
  57 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
  58 
  59 // ============================================================================
  60 // Misc helpers
  61 
  62 // Do an oop store like *(base + index) = val OR *(base + offset) = val
  63 // (only one of both variants is possible at the same time).
  64 // Index can be noreg.
  65 // Kills:
  66 //   Rbase, Rtmp
  67 static void do_oop_store(InterpreterMacroAssembler* _masm,
  68                          Register           base,
  69                          RegisterOrConstant offset,
  70                          Register           val,         // Noreg means always null.
  71                          Register           tmp1,
  72                          Register           tmp2,
  73                          Register           tmp3,
  74                          DecoratorSet       decorators) {
  75   assert_different_registers(tmp1, tmp2, tmp3, val, base);
  76   __ store_heap_oop(val, offset, base, tmp1, tmp2, tmp3, MacroAssembler::PRESERVATION_NONE, decorators);
  77 }
  78 
  79 static void do_oop_load(InterpreterMacroAssembler* _masm,
  80                         Register base,
  81                         RegisterOrConstant offset,
  82                         Register dst,
  83                         Register tmp1,
  84                         Register tmp2,
  85                         DecoratorSet decorators) {
  86   assert_different_registers(base, tmp1, tmp2);
  87   assert_different_registers(dst, tmp1, tmp2);
  88   __ load_heap_oop(dst, offset, base, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE, decorators);
  89 }
  90 
  91 Address TemplateTable::at_bcp(int offset) {
  92   // Not used on ppc.
  93   ShouldNotReachHere();
  94   return Address();
  95 }
  96 
  97 // Patches the current bytecode (ptr to it located in bcp)
  98 // in the bytecode stream with a new one.
  99 void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Register Rtemp, bool load_bc_into_bc_reg /*=true*/, int byte_no) {
 100   // With sharing on, may need to test method flag.
 101   if (!RewriteBytecodes) return;
 102   Label L_patch_done;
 103 
 104   switch (new_bc) {
 105     case Bytecodes::_fast_vputfield:
 106     case Bytecodes::_fast_aputfield:
 107     case Bytecodes::_fast_bputfield:
 108     case Bytecodes::_fast_zputfield:
 109     case Bytecodes::_fast_cputfield:
 110     case Bytecodes::_fast_dputfield:
 111     case Bytecodes::_fast_fputfield:
 112     case Bytecodes::_fast_iputfield:
 113     case Bytecodes::_fast_lputfield:
 114     case Bytecodes::_fast_sputfield:
 115     {
 116       // We skip bytecode quickening for putfield instructions when
 117       // the put_code written to the constant pool cache is zero.
 118       // This is required so that every execution of this instruction
 119       // calls out to InterpreterRuntime::resolve_get_put to do
 120       // additional, required work.
 121       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
 122       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
 123       __ load_field_entry(Rtemp, Rnew_bc);
 124       int code_offset = (byte_no == f1_byte) ? in_bytes(ResolvedFieldEntry::get_code_offset())
 125                                              : in_bytes(ResolvedFieldEntry::put_code_offset());
 126       __ lbz(Rnew_bc, code_offset, Rtemp);
 127       __ cmpwi(CR0, Rnew_bc, 0);
 128       __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
 129       __ beq(CR0, L_patch_done);
 130       // __ isync(); // acquire not needed
 131       break;
 132     }
 133 
 134     default:
 135       assert(byte_no == -1, "sanity");
 136       if (load_bc_into_bc_reg) {
 137         __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
 138       }
 139   }
 140 
 141   if (JvmtiExport::can_post_breakpoint()) {
 142     Label L_fast_patch;
 143     __ lbz(Rtemp, 0, R14_bcp);
 144     __ cmpwi(CR0, Rtemp, (unsigned int)(unsigned char)Bytecodes::_breakpoint);
 145     __ bne(CR0, L_fast_patch);
 146     // Perform the quickening, slowly, in the bowels of the breakpoint table.
 147     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R19_method, R14_bcp, Rnew_bc);
 148     __ b(L_patch_done);
 149     __ bind(L_fast_patch);
 150   }
 151 
 152   // Patch bytecode with release store to coordinate with ResolvedFieldEntry
 153   // and ResolvedMethodEntry loads in fast bytecode codelets.
 154   __ release();
 155   __ stb(Rnew_bc, 0, R14_bcp);
 156 
 157   __ bind(L_patch_done);
 158 }
 159 
 160 // ============================================================================
 161 // Individual instructions
 162 
 163 void TemplateTable::nop() {
 164   transition(vtos, vtos);
 165   // Nothing to do.
 166 }
 167 
 168 void TemplateTable::shouldnotreachhere() {
 169   transition(vtos, vtos);
 170   __ stop("shouldnotreachhere bytecode");
 171 }
 172 
 173 void TemplateTable::aconst_null() {
 174   transition(vtos, atos);
 175   __ li(R17_tos, 0);
 176 }
 177 
 178 void TemplateTable::iconst(int value) {
 179   transition(vtos, itos);
 180   assert(value >= -1 && value <= 5, "");
 181   __ li(R17_tos, value);
 182 }
 183 
 184 void TemplateTable::lconst(int value) {
 185   transition(vtos, ltos);
 186   assert(value >= -1 && value <= 5, "");
 187   __ li(R17_tos, value);
 188 }
 189 
 190 void TemplateTable::fconst(int value) {
 191   transition(vtos, ftos);
 192   static float zero = 0.0;
 193   static float one  = 1.0;
 194   static float two  = 2.0;
 195   switch (value) {
 196     default: ShouldNotReachHere();
 197     case 0: {
 198       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
 199       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 200       break;
 201     }
 202     case 1: {
 203       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
 204       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 205       break;
 206     }
 207     case 2: {
 208       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&two, R0, true);
 209       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 210       break;
 211     }
 212   }
 213 }
 214 
 215 void TemplateTable::dconst(int value) {
 216   transition(vtos, dtos);
 217   static double zero = 0.0;
 218   static double one  = 1.0;
 219   switch (value) {
 220     case 0: {
 221       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
 222       __ lfd(F15_ftos, simm16_offset, R11_scratch1);
 223       break;
 224     }
 225     case 1: {
 226       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
 227       __ lfd(F15_ftos, simm16_offset, R11_scratch1);
 228       break;
 229     }
 230     default: ShouldNotReachHere();
 231   }
 232 }
 233 
 234 void TemplateTable::bipush() {
 235   transition(vtos, itos);
 236   __ lbz(R17_tos, 1, R14_bcp);
 237   __ extsb(R17_tos, R17_tos);
 238 }
 239 
 240 void TemplateTable::sipush() {
 241   transition(vtos, itos);
 242   __ get_2_byte_integer_at_bcp(1, R17_tos, InterpreterMacroAssembler::Signed);
 243 }
 244 
 245 void TemplateTable::ldc(LdcType type) {
 246   Register Rscratch1 = R11_scratch1,
 247            Rscratch2 = R12_scratch2,
 248            Rcpool    = R3_ARG1;
 249 
 250   transition(vtos, vtos);
 251   Label notInt, notFloat, notClass, exit;
 252 
 253   __ get_cpool_and_tags(Rcpool, Rscratch2); // Set Rscratch2 = &tags.
 254   if (is_ldc_wide(type)) { // Read index.
 255     __ get_2_byte_integer_at_bcp(1, Rscratch1, InterpreterMacroAssembler::Unsigned);
 256   } else {
 257     __ lbz(Rscratch1, 1, R14_bcp);
 258   }
 259 
 260   const int base_offset = ConstantPool::header_size() * wordSize;
 261   const int tags_offset = Array<u1>::base_offset_in_bytes();
 262 
 263   // Get type from tags.
 264   __ addi(Rscratch2, Rscratch2, tags_offset);
 265   __ lbzx(Rscratch2, Rscratch2, Rscratch1);
 266 
 267   __ cmpwi(CR0, Rscratch2, JVM_CONSTANT_UnresolvedClass); // Unresolved class?
 268   __ cmpwi(CR1, Rscratch2, JVM_CONSTANT_UnresolvedClassInError); // Unresolved class in error state?
 269   __ cror(CR0, Assembler::equal, CR1, Assembler::equal);
 270 
 271   // Resolved class - need to call vm to get java mirror of the class.
 272   __ cmpwi(CR1, Rscratch2, JVM_CONSTANT_Class);
 273   __ crnor(CR0, Assembler::equal, CR1, Assembler::equal); // Neither resolved class nor unresolved case from above?
 274   __ beq(CR0, notClass);
 275 
 276   __ li(R4, is_ldc_wide(type) ? 1 : 0);
 277   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), R4);
 278   __ push(atos);
 279   __ b(exit);
 280 
 281   __ align(32, 12);
 282   __ bind(notClass);
 283   __ addi(Rcpool, Rcpool, base_offset);
 284   __ sldi(Rscratch1, Rscratch1, LogBytesPerWord);
 285   __ cmpdi(CR0, Rscratch2, JVM_CONSTANT_Integer);
 286   __ bne(CR0, notInt);
 287   __ lwax(R17_tos, Rcpool, Rscratch1);
 288   __ push(itos);
 289   __ b(exit);
 290 
 291   __ align(32, 12);
 292   __ bind(notInt);
 293   __ cmpdi(CR0, Rscratch2, JVM_CONSTANT_Float);
 294   __ bne(CR0, notFloat);
 295   __ lfsx(F15_ftos, Rcpool, Rscratch1);
 296   __ push(ftos);
 297   __ b(exit);
 298 
 299   __ align(32, 12);
 300   // assume the tag is for condy; if not, the VM runtime will tell us
 301   __ bind(notFloat);
 302   condy_helper(exit);
 303 
 304   __ align(32, 12);
 305   __ bind(exit);
 306 }
 307 
 308 // Fast path for caching oop constants.
 309 void TemplateTable::fast_aldc(LdcType type) {
 310   transition(vtos, atos);
 311 
 312   int index_size = is_ldc_wide(type) ? sizeof(u2) : sizeof(u1);
 313   Label is_null;
 314 
 315   // We are resolved if the resolved reference cache entry contains a
 316   // non-null object (CallSite, etc.)
 317   __ get_cache_index_at_bcp(R31, 1, index_size);  // Load index.
 318   // Only rewritten during link time. So, no need for memory barriers for accessing resolved info.
 319   __ load_resolved_reference_at_index(R17_tos, R31, R11_scratch1, R12_scratch2, &is_null);
 320 
 321   // Convert null sentinel to null
 322   int simm16_rest = __ load_const_optimized(R11_scratch1, Universe::the_null_sentinel_addr(), R0, true);
 323   __ ld(R31, simm16_rest, R11_scratch1);
 324   __ resolve_oop_handle(R31, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
 325   __ cmpld(CR0, R17_tos, R31);
 326   __ isel_0(R17_tos, CR0, Assembler::equal);
 327   __ verify_oop(R17_tos);
 328   __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
 329 
 330   __ bind(is_null);
 331   __ load_const_optimized(R3_ARG1, (int)bytecode());
 332 
 333   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 334 
 335   // First time invocation - must resolve first.
 336   __ call_VM(R17_tos, entry, R3_ARG1);
 337   __ verify_oop(R17_tos);
 338 }
 339 
 340 void TemplateTable::ldc2_w() {
 341   transition(vtos, vtos);
 342   Label not_double, not_long, exit;
 343 
 344   Register Rindex = R11_scratch1,
 345            Rcpool = R12_scratch2,
 346            Rtag   = R3_ARG1;
 347   __ get_cpool_and_tags(Rcpool, Rtag);
 348   __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
 349 
 350   const int base_offset = ConstantPool::header_size() * wordSize;
 351   const int tags_offset = Array<u1>::base_offset_in_bytes();
 352   // Get type from tags.
 353   __ addi(Rcpool, Rcpool, base_offset);
 354   __ addi(Rtag, Rtag, tags_offset);
 355 
 356   __ lbzx(Rtag, Rtag, Rindex);
 357   __ sldi(Rindex, Rindex, LogBytesPerWord);
 358 
 359   __ cmpdi(CR0, Rtag, JVM_CONSTANT_Double);
 360   __ bne(CR0, not_double);
 361   __ lfdx(F15_ftos, Rcpool, Rindex);
 362   __ push(dtos);
 363   __ b(exit);
 364 
 365   __ bind(not_double);
 366   __ cmpdi(CR0, Rtag, JVM_CONSTANT_Long);
 367   __ bne(CR0, not_long);
 368   __ ldx(R17_tos, Rcpool, Rindex);
 369   __ push(ltos);
 370   __ b(exit);
 371 
 372   __ bind(not_long);
 373   condy_helper(exit);
 374 
 375   __ align(32, 12);
 376   __ bind(exit);
 377 }
 378 
 379 void TemplateTable::condy_helper(Label& Done) {
 380   const Register obj   = R31;
 381   const Register off   = R11_scratch1;
 382   const Register flags = R12_scratch2;
 383   const Register rarg  = R4_ARG2;
 384   __ li(rarg, (int)bytecode());
 385   call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
 386   __ get_vm_result_metadata(flags);
 387 
 388   // VMr = obj = base address to find primitive value to push
 389   // VMr2 = flags = (tos, off) using format of CPCE::_flags
 390   __ andi(off, flags, ConstantPoolCache::field_index_mask);
 391 
 392   // What sort of thing are we loading?
 393   __ rldicl(flags, flags, 64-ConstantPoolCache::tos_state_shift, 64-ConstantPoolCache::tos_state_bits);
 394 
 395   switch (bytecode()) {
 396   case Bytecodes::_ldc:
 397   case Bytecodes::_ldc_w:
 398     {
 399       // tos in (itos, ftos, stos, btos, ctos, ztos)
 400       Label notInt, notFloat, notShort, notByte, notChar, notBool;
 401       __ cmplwi(CR0, flags, itos);
 402       __ bne(CR0, notInt);
 403       // itos
 404       __ lwax(R17_tos, obj, off);
 405       __ push(itos);
 406       __ b(Done);
 407 
 408       __ bind(notInt);
 409       __ cmplwi(CR0, flags, ftos);
 410       __ bne(CR0, notFloat);
 411       // ftos
 412       __ lfsx(F15_ftos, obj, off);
 413       __ push(ftos);
 414       __ b(Done);
 415 
 416       __ bind(notFloat);
 417       __ cmplwi(CR0, flags, stos);
 418       __ bne(CR0, notShort);
 419       // stos
 420       __ lhax(R17_tos, obj, off);
 421       __ push(stos);
 422       __ b(Done);
 423 
 424       __ bind(notShort);
 425       __ cmplwi(CR0, flags, btos);
 426       __ bne(CR0, notByte);
 427       // btos
 428       __ lbzx(R17_tos, obj, off);
 429       __ extsb(R17_tos, R17_tos);
 430       __ push(btos);
 431       __ b(Done);
 432 
 433       __ bind(notByte);
 434       __ cmplwi(CR0, flags, ctos);
 435       __ bne(CR0, notChar);
 436       // ctos
 437       __ lhzx(R17_tos, obj, off);
 438       __ push(ctos);
 439       __ b(Done);
 440 
 441       __ bind(notChar);
 442       __ cmplwi(CR0, flags, ztos);
 443       __ bne(CR0, notBool);
 444       // ztos
 445       __ lbzx(R17_tos, obj, off);
 446       __ push(ztos);
 447       __ b(Done);
 448 
 449       __ bind(notBool);
 450       break;
 451     }
 452 
 453   case Bytecodes::_ldc2_w:
 454     {
 455       Label notLong, notDouble;
 456       __ cmplwi(CR0, flags, ltos);
 457       __ bne(CR0, notLong);
 458       // ltos
 459       __ ldx(R17_tos, obj, off);
 460       __ push(ltos);
 461       __ b(Done);
 462 
 463       __ bind(notLong);
 464       __ cmplwi(CR0, flags, dtos);
 465       __ bne(CR0, notDouble);
 466       // dtos
 467       __ lfdx(F15_ftos, obj, off);
 468       __ push(dtos);
 469       __ b(Done);
 470 
 471       __ bind(notDouble);
 472       break;
 473     }
 474 
 475   default:
 476     ShouldNotReachHere();
 477   }
 478 
 479   __ stop("bad ldc/condy");
 480 }
 481 
 482 // Get the locals index located in the bytecode stream at bcp + offset.
 483 void TemplateTable::locals_index(Register Rdst, int offset) {
 484   __ lbz(Rdst, offset, R14_bcp);
 485 }
 486 
 487 void TemplateTable::iload() {
 488   iload_internal();
 489 }
 490 
 491 void TemplateTable::nofast_iload() {
 492   iload_internal(may_not_rewrite);
 493 }
 494 
 495 void TemplateTable::iload_internal(RewriteControl rc) {
 496   transition(vtos, itos);
 497 
 498   // Get the local value into tos
 499   const Register Rindex = R22_tmp2;
 500   locals_index(Rindex);
 501 
 502   // Rewrite iload,iload  pair into fast_iload2
 503   //         iload,caload pair into fast_icaload
 504   if (RewriteFrequentPairs && rc == may_rewrite) {
 505     Label Lrewrite, Ldone;
 506     Register Rnext_byte  = R3_ARG1,
 507              Rrewrite_to = R6_ARG4,
 508              Rscratch    = R11_scratch1;
 509 
 510     // get next byte
 511     __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_iload), R14_bcp);
 512 
 513     // if _iload, wait to rewrite to iload2. We only want to rewrite the
 514     // last two iloads in a pair. Comparing against fast_iload means that
 515     // the next bytecode is neither an iload or a caload, and therefore
 516     // an iload pair.
 517     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_iload);
 518     __ beq(CR0, Ldone);
 519 
 520     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
 521     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload2);
 522     __ beq(CR1, Lrewrite);
 523 
 524     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_caload);
 525     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_icaload);
 526     __ beq(CR0, Lrewrite);
 527 
 528     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
 529 
 530     __ bind(Lrewrite);
 531     patch_bytecode(Bytecodes::_iload, Rrewrite_to, Rscratch, false);
 532     __ bind(Ldone);
 533   }
 534 
 535   __ load_local_int(R17_tos, Rindex, Rindex);
 536 }
 537 
 538 // Load 2 integers in a row without dispatching
 539 void TemplateTable::fast_iload2() {
 540   transition(vtos, itos);
 541 
 542   __ lbz(R3_ARG1, 1, R14_bcp);
 543   __ lbz(R17_tos, Bytecodes::length_for(Bytecodes::_iload) + 1, R14_bcp);
 544 
 545   __ load_local_int(R3_ARG1, R11_scratch1, R3_ARG1);
 546   __ load_local_int(R17_tos, R12_scratch2, R17_tos);
 547   __ push_i(R3_ARG1);
 548 }
 549 
 550 void TemplateTable::fast_iload() {
 551   transition(vtos, itos);
 552   // Get the local value into tos
 553 
 554   const Register Rindex = R11_scratch1;
 555   locals_index(Rindex);
 556   __ load_local_int(R17_tos, Rindex, Rindex);
 557 }
 558 
 559 // Load a local variable type long from locals area to TOS cache register.
 560 // Local index resides in bytecodestream.
 561 void TemplateTable::lload() {
 562   transition(vtos, ltos);
 563 
 564   const Register Rindex = R11_scratch1;
 565   locals_index(Rindex);
 566   __ load_local_long(R17_tos, Rindex, Rindex);
 567 }
 568 
 569 void TemplateTable::fload() {
 570   transition(vtos, ftos);
 571 
 572   const Register Rindex = R11_scratch1;
 573   locals_index(Rindex);
 574   __ load_local_float(F15_ftos, Rindex, Rindex);
 575 }
 576 
 577 void TemplateTable::dload() {
 578   transition(vtos, dtos);
 579 
 580   const Register Rindex = R11_scratch1;
 581   locals_index(Rindex);
 582   __ load_local_double(F15_ftos, Rindex, Rindex);
 583 }
 584 
 585 void TemplateTable::aload() {
 586   transition(vtos, atos);
 587 
 588   const Register Rindex = R11_scratch1;
 589   locals_index(Rindex);
 590   __ load_local_ptr(R17_tos, Rindex, Rindex);
 591 }
 592 
 593 void TemplateTable::locals_index_wide(Register Rdst) {
 594   // Offset is 2, not 1, because Lbcp points to wide prefix code.
 595   __ get_2_byte_integer_at_bcp(2, Rdst, InterpreterMacroAssembler::Unsigned);
 596 }
 597 
 598 void TemplateTable::wide_iload() {
 599   // Get the local value into tos.
 600 
 601   const Register Rindex = R11_scratch1;
 602   locals_index_wide(Rindex);
 603   __ load_local_int(R17_tos, Rindex, Rindex);
 604 }
 605 
 606 void TemplateTable::wide_lload() {
 607   transition(vtos, ltos);
 608 
 609   const Register Rindex = R11_scratch1;
 610   locals_index_wide(Rindex);
 611   __ load_local_long(R17_tos, Rindex, Rindex);
 612 }
 613 
 614 void TemplateTable::wide_fload() {
 615   transition(vtos, ftos);
 616 
 617   const Register Rindex = R11_scratch1;
 618   locals_index_wide(Rindex);
 619   __ load_local_float(F15_ftos, Rindex, Rindex);
 620 }
 621 
 622 void TemplateTable::wide_dload() {
 623   transition(vtos, dtos);
 624 
 625   const Register Rindex = R11_scratch1;
 626   locals_index_wide(Rindex);
 627   __ load_local_double(F15_ftos, Rindex, Rindex);
 628 }
 629 
 630 void TemplateTable::wide_aload() {
 631   transition(vtos, atos);
 632 
 633   const Register Rindex = R11_scratch1;
 634   locals_index_wide(Rindex);
 635   __ load_local_ptr(R17_tos, Rindex, Rindex);
 636 }
 637 
 638 void TemplateTable::iaload() {
 639   transition(itos, itos);
 640 
 641   const Register Rload_addr = R3_ARG1,
 642                  Rarray     = R4_ARG2,
 643                  Rtemp      = R5_ARG3;
 644   __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
 645   __ lwa(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rload_addr);
 646 }
 647 
 648 void TemplateTable::laload() {
 649   transition(itos, ltos);
 650 
 651   const Register Rload_addr = R3_ARG1,
 652                  Rarray     = R4_ARG2,
 653                  Rtemp      = R5_ARG3;
 654   __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
 655   __ ld(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rload_addr);
 656 }
 657 
 658 void TemplateTable::faload() {
 659   transition(itos, ftos);
 660 
 661   const Register Rload_addr = R3_ARG1,
 662                  Rarray     = R4_ARG2,
 663                  Rtemp      = R5_ARG3;
 664   __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
 665   __ lfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rload_addr);
 666 }
 667 
 668 void TemplateTable::daload() {
 669   transition(itos, dtos);
 670 
 671   const Register Rload_addr = R3_ARG1,
 672                  Rarray     = R4_ARG2,
 673                  Rtemp      = R5_ARG3;
 674   __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
 675   __ lfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rload_addr);
 676 }
 677 
 678 void TemplateTable::aaload() {
 679   transition(itos, atos);
 680 
 681   // tos: index
 682   // result tos: array
 683   const Register Rload_addr = R3_ARG1,
 684                  Rarray     = R4_ARG2,
 685                  Rtemp      = R5_ARG3,
 686                  Rtemp2     = R31;
 687   __ index_check(Rarray, R17_tos /* index */, UseCompressedOops ? 2 : LogBytesPerWord, Rtemp, Rload_addr);
 688   __ profile_array_type<ArrayLoadData>(Rarray, R11_scratch1, R12_scratch2);
 689   if (UseArrayFlattening) {
 690     Label is_flat_array, cont;
 691 
 692     __ test_flat_array_oop(Rarray, Rtemp, is_flat_array);
 693     do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2,
 694                 IS_ARRAY);
 695     __ verify_oop(R17_tos);
 696     __ b(cont);
 697 
 698     __ bind(is_flat_array);
 699     __ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_load), Rarray, R17_tos);
 700     __ bind(cont);
 701   } else {
 702     do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2,
 703                 IS_ARRAY);
 704     __ verify_oop(R17_tos);
 705   }
 706   __ profile_element_type(R17_tos, Rtemp, Rtemp2);
 707 
 708   //__ dcbt(R17_tos); // prefetch
 709 }
 710 
 711 void TemplateTable::baload() {
 712   transition(itos, itos);
 713 
 714   const Register Rload_addr = R3_ARG1,
 715                  Rarray     = R4_ARG2,
 716                  Rtemp      = R5_ARG3;
 717   __ index_check(Rarray, R17_tos /* index */, 0, Rtemp, Rload_addr);
 718   __ lbz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rload_addr);
 719   __ extsb(R17_tos, R17_tos);
 720 }
 721 
 722 void TemplateTable::caload() {
 723   transition(itos, itos);
 724 
 725   const Register Rload_addr = R3_ARG1,
 726                  Rarray     = R4_ARG2,
 727                  Rtemp      = R5_ARG3;
 728   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 729   __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
 730 }
 731 
 732 // Iload followed by caload frequent pair.
 733 void TemplateTable::fast_icaload() {
 734   transition(vtos, itos);
 735 
 736   const Register Rload_addr = R3_ARG1,
 737                  Rarray     = R4_ARG2,
 738                  Rtemp      = R11_scratch1;
 739 
 740   locals_index(R17_tos);
 741   __ load_local_int(R17_tos, Rtemp, R17_tos);
 742   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 743   __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
 744 }
 745 
 746 void TemplateTable::saload() {
 747   transition(itos, itos);
 748 
 749   const Register Rload_addr = R11_scratch1,
 750                  Rarray     = R12_scratch2,
 751                  Rtemp      = R3_ARG1;
 752   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 753   __ lha(R17_tos, arrayOopDesc::base_offset_in_bytes(T_SHORT), Rload_addr);
 754 }
 755 
 756 void TemplateTable::iload(int n) {
 757   transition(vtos, itos);
 758 
 759   __ lwz(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
 760 }
 761 
 762 void TemplateTable::lload(int n) {
 763   transition(vtos, ltos);
 764 
 765   __ ld(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
 766 }
 767 
 768 void TemplateTable::fload(int n) {
 769   transition(vtos, ftos);
 770 
 771   __ lfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
 772 }
 773 
 774 void TemplateTable::dload(int n) {
 775   transition(vtos, dtos);
 776 
 777   __ lfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
 778 }
 779 
 780 void TemplateTable::aload(int n) {
 781   transition(vtos, atos);
 782 
 783   __ ld(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
 784 }
 785 
 786 void TemplateTable::aload_0() {
 787   aload_0_internal();
 788 }
 789 
 790 void TemplateTable::nofast_aload_0() {
 791   aload_0_internal(may_not_rewrite);
 792 }
 793 
 794 void TemplateTable::aload_0_internal(RewriteControl rc) {
 795   transition(vtos, atos);
 796   // According to bytecode histograms, the pairs:
 797   //
 798   // _aload_0, _fast_igetfield
 799   // _aload_0, _fast_agetfield
 800   // _aload_0, _fast_fgetfield
 801   //
 802   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 803   // _aload_0 bytecode checks if the next bytecode is either
 804   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 805   // rewrites the current bytecode into a pair bytecode; otherwise it
 806   // rewrites the current bytecode into _0 that doesn't do
 807   // the pair check anymore.
 808   //
 809   // Note: If the next bytecode is _getfield, the rewrite must be
 810   //       delayed, otherwise we may miss an opportunity for a pair.
 811   //
 812   // Also rewrite frequent pairs
 813   //   aload_0, aload_1
 814   //   aload_0, iload_1
 815   // These bytecodes with a small amount of code are most profitable
 816   // to rewrite.
 817 
 818   if (RewriteFrequentPairs && rc == may_rewrite) {
 819 
 820     Label Lrewrite, Ldont_rewrite;
 821     Register Rnext_byte  = R3_ARG1,
 822              Rrewrite_to = R6_ARG4,
 823              Rscratch    = R11_scratch1;
 824 
 825     // Get next byte.
 826     __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_aload_0), R14_bcp);
 827 
 828     // If _getfield, wait to rewrite. We only want to rewrite the last two bytecodes in a pair.
 829     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_getfield);
 830     __ beq(CR0, Ldont_rewrite);
 831 
 832     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_igetfield);
 833     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iaccess_0);
 834     __ beq(CR1, Lrewrite);
 835 
 836     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_agetfield);
 837     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aaccess_0);
 838     __ beq(CR0, Lrewrite);
 839 
 840     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_fgetfield);
 841     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_faccess_0);
 842     __ beq(CR1, Lrewrite);
 843 
 844     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aload_0);
 845 
 846     __ bind(Lrewrite);
 847     patch_bytecode(Bytecodes::_aload_0, Rrewrite_to, Rscratch, false);
 848     __ bind(Ldont_rewrite);
 849   }
 850 
 851   // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
 852   aload(0);
 853 }
 854 
 855 void TemplateTable::istore() {
 856   transition(itos, vtos);
 857 
 858   const Register Rindex = R11_scratch1;
 859   locals_index(Rindex);
 860   __ store_local_int(R17_tos, Rindex);
 861 }
 862 
 863 void TemplateTable::lstore() {
 864   transition(ltos, vtos);
 865   const Register Rindex = R11_scratch1;
 866   locals_index(Rindex);
 867   __ store_local_long(R17_tos, Rindex);
 868 }
 869 
 870 void TemplateTable::fstore() {
 871   transition(ftos, vtos);
 872 
 873   const Register Rindex = R11_scratch1;
 874   locals_index(Rindex);
 875   __ store_local_float(F15_ftos, Rindex);
 876 }
 877 
 878 void TemplateTable::dstore() {
 879   transition(dtos, vtos);
 880 
 881   const Register Rindex = R11_scratch1;
 882   locals_index(Rindex);
 883   __ store_local_double(F15_ftos, Rindex);
 884 }
 885 
 886 void TemplateTable::astore() {
 887   transition(vtos, vtos);
 888 
 889   const Register Rindex = R11_scratch1;
 890   __ pop_ptr();
 891   __ verify_oop_or_return_address(R17_tos, Rindex);
 892   locals_index(Rindex);
 893   __ store_local_ptr(R17_tos, Rindex);
 894 }
 895 
 896 void TemplateTable::wide_istore() {
 897   transition(vtos, vtos);
 898 
 899   const Register Rindex = R11_scratch1;
 900   __ pop_i();
 901   locals_index_wide(Rindex);
 902   __ store_local_int(R17_tos, Rindex);
 903 }
 904 
 905 void TemplateTable::wide_lstore() {
 906   transition(vtos, vtos);
 907 
 908   const Register Rindex = R11_scratch1;
 909   __ pop_l();
 910   locals_index_wide(Rindex);
 911   __ store_local_long(R17_tos, Rindex);
 912 }
 913 
 914 void TemplateTable::wide_fstore() {
 915   transition(vtos, vtos);
 916 
 917   const Register Rindex = R11_scratch1;
 918   __ pop_f();
 919   locals_index_wide(Rindex);
 920   __ store_local_float(F15_ftos, Rindex);
 921 }
 922 
 923 void TemplateTable::wide_dstore() {
 924   transition(vtos, vtos);
 925 
 926   const Register Rindex = R11_scratch1;
 927   __ pop_d();
 928   locals_index_wide(Rindex);
 929   __ store_local_double(F15_ftos, Rindex);
 930 }
 931 
 932 void TemplateTable::wide_astore() {
 933   transition(vtos, vtos);
 934 
 935   const Register Rindex = R11_scratch1;
 936   __ pop_ptr();
 937   __ verify_oop_or_return_address(R17_tos, Rindex);
 938   locals_index_wide(Rindex);
 939   __ store_local_ptr(R17_tos, Rindex);
 940 }
 941 
 942 void TemplateTable::iastore() {
 943   transition(itos, vtos);
 944 
 945   const Register Rindex      = R3_ARG1,
 946                  Rstore_addr = R4_ARG2,
 947                  Rarray      = R5_ARG3,
 948                  Rtemp       = R6_ARG4;
 949   __ pop_i(Rindex);
 950   __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
 951   __ stw(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rstore_addr);
 952   }
 953 
 954 void TemplateTable::lastore() {
 955   transition(ltos, vtos);
 956 
 957   const Register Rindex      = R3_ARG1,
 958                  Rstore_addr = R4_ARG2,
 959                  Rarray      = R5_ARG3,
 960                  Rtemp       = R6_ARG4;
 961   __ pop_i(Rindex);
 962   __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
 963   __ std(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rstore_addr);
 964   }
 965 
 966 void TemplateTable::fastore() {
 967   transition(ftos, vtos);
 968 
 969   const Register Rindex      = R3_ARG1,
 970                  Rstore_addr = R4_ARG2,
 971                  Rarray      = R5_ARG3,
 972                  Rtemp       = R6_ARG4;
 973   __ pop_i(Rindex);
 974   __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
 975   __ stfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rstore_addr);
 976   }
 977 
 978 void TemplateTable::dastore() {
 979   transition(dtos, vtos);
 980 
 981   const Register Rindex      = R3_ARG1,
 982                  Rstore_addr = R4_ARG2,
 983                  Rarray      = R5_ARG3,
 984                  Rtemp       = R6_ARG4;
 985   __ pop_i(Rindex);
 986   __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
 987   __ stfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rstore_addr);
 988   }
 989 
 990 // Pop 3 values from the stack and...
 991 void TemplateTable::aastore() {
 992   transition(vtos, vtos);
 993 
 994   Label Lstore_ok, Lis_null, Lis_flat_array, Lwrite_null_to_null_free_array, Ldone;
 995   const Register Rindex    = R6_ARG4,
 996                  Rarray    = R5_ARG3,
 997                  Rscratch  = R11_scratch1,
 998                  Rscratch2 = R12_scratch2,
 999                  Rarray_klass = R4_ARG2,
1000                  Rarray_element_klass = Rarray_klass,
1001                  Rvalue_klass = R3_ARG1,
1002                  Rstore_addr = R31;    // Use register which survives VM call.
1003 
1004   __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp); // Get value to store.
1005   __ lwz(Rindex, Interpreter::expr_offset_in_bytes(1), R15_esp); // Get index.
1006   __ ld(Rarray, Interpreter::expr_offset_in_bytes(2), R15_esp);  // Get array.
1007 
1008   __ verify_oop(R17_tos);
1009   __ index_check_without_pop(Rarray, Rindex, UseCompressedOops ? 2 : LogBytesPerWord, Rscratch, Rstore_addr);
1010 
1011   __ profile_array_type<ArrayStoreData>(Rarray, Rscratch, Rscratch2);
1012   __ profile_multiple_element_types(R17_tos, Rscratch, Rscratch2, /* temp */ Rarray_klass);
1013 
1014   if (UseArrayFlattening) {
1015     __ load_klass(Rarray_klass, Rarray);
1016     __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rarray_klass);
1017     __ test_flat_array_layout(Rscratch, Lis_flat_array);
1018   }
1019 
1020   // Do array store check - check for null value first.
1021   __ cmpdi(CR0, R17_tos, 0);
1022   __ beq(CR0, Lis_null);
1023 
1024   // Rindex is dead!
1025   Register Rscratch3 = Rindex;
1026 
1027   if (!UseArrayFlattening) {
1028     __ load_klass(Rarray_klass, Rarray); // haven't done this above
1029   }
1030   __ load_klass(Rvalue_klass, R17_tos);
1031 
1032   // Do fast instanceof cache test.
1033   __ ld(Rarray_element_klass, in_bytes(ObjArrayKlass::element_klass_offset()), Rarray_klass);
1034 
1035   // Generate a fast subtype check. Branch to store_ok if no failure. Throw if failure.
1036   __ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/,
1037                        Rscratch, Rscratch2, Rscratch3, Lstore_ok, false);
1038 
1039   // Fell through: subtype check failed => throw an exception.
1040   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArrayStoreException_entry);
1041   __ mtctr(R11_scratch1);
1042   __ bctr();
1043 
1044   if (UseArrayFlattening) {
1045     __ bind(Lis_flat_array); // Store non-null value to flat
1046     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_store), R17_tos, Rarray, Rindex);
1047     __ b(Ldone);
1048   }
1049 
1050   __ bind(Lis_null);
1051   if (Arguments::is_valhalla_enabled()) {
1052     // No way to store null in null-free array
1053     __ test_null_free_array_oop(Rarray, Rscratch, Lwrite_null_to_null_free_array);
1054   }
1055   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), noreg /* 0 */,
1056                Rscratch, Rscratch2, Rscratch3, IS_ARRAY);
1057   __ b(Ldone);
1058 
1059   if (Arguments::is_valhalla_enabled()) {
1060     __ bind(Lwrite_null_to_null_free_array);
1061     __ load_dispatch_table(Rscratch, (address*)Interpreter::_throw_NullPointerException_entry);
1062     __ mtctr(Rscratch);
1063     __ bctr();
1064   }
1065 
1066   // Store is OK.
1067   __ align(32, 12);
1068   __ bind(Lstore_ok);
1069   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */,
1070                Rscratch, Rscratch2, Rscratch3, IS_ARRAY | IS_NOT_NULL);
1071 
1072   __ bind(Ldone);
1073   // Adjust sp (pops array, index and value).
1074   __ addi(R15_esp, R15_esp, 3 * Interpreter::stackElementSize);
1075 }
1076 
1077 void TemplateTable::bastore() {
1078   transition(itos, vtos);
1079 
1080   const Register Rindex   = R11_scratch1,
1081                  Rarray   = R12_scratch2,
1082                  Rscratch = R3_ARG1;
1083   __ pop_i(Rindex);
1084   __ pop_ptr(Rarray);
1085   // tos: val
1086 
1087   // Need to check whether array is boolean or byte
1088   // since both types share the bastore bytecode.
1089   __ load_klass_check_null_throw(Rscratch, Rarray, Rscratch);
1090   __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch);
1091   int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
1092   __ testbitdi(CR0, R0, Rscratch, diffbit);
1093   Label L_skip;
1094   __ bfalse(CR0, L_skip);
1095   __ andi(R17_tos, R17_tos, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1096   __ bind(L_skip);
1097 
1098   __ index_check_without_pop(Rarray, Rindex, 0, Rscratch, Rarray);
1099   __ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray);
1100 }
1101 
1102 void TemplateTable::castore() {
1103   transition(itos, vtos);
1104 
1105   const Register Rindex   = R11_scratch1,
1106                  Rarray   = R12_scratch2,
1107                  Rscratch = R3_ARG1;
1108   __ pop_i(Rindex);
1109   // tos: val
1110   // Rarray: array ptr (popped by index_check)
1111   __ index_check(Rarray, Rindex, LogBytesPerShort, Rscratch, Rarray);
1112   __ sth(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rarray);
1113 }
1114 
1115 void TemplateTable::sastore() {
1116   castore();
1117 }
1118 
1119 void TemplateTable::istore(int n) {
1120   transition(itos, vtos);
1121   __ stw(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
1122 }
1123 
1124 void TemplateTable::lstore(int n) {
1125   transition(ltos, vtos);
1126   __ std(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
1127 }
1128 
1129 void TemplateTable::fstore(int n) {
1130   transition(ftos, vtos);
1131   __ stfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
1132 }
1133 
1134 void TemplateTable::dstore(int n) {
1135   transition(dtos, vtos);
1136   __ stfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
1137 }
1138 
1139 void TemplateTable::astore(int n) {
1140   transition(vtos, vtos);
1141 
1142   __ pop_ptr();
1143   __ verify_oop_or_return_address(R17_tos, R11_scratch1);
1144   __ std(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
1145 }
1146 
1147 void TemplateTable::pop() {
1148   transition(vtos, vtos);
1149 
1150   __ addi(R15_esp, R15_esp, Interpreter::stackElementSize);
1151 }
1152 
1153 void TemplateTable::pop2() {
1154   transition(vtos, vtos);
1155 
1156   __ addi(R15_esp, R15_esp, Interpreter::stackElementSize * 2);
1157 }
1158 
1159 void TemplateTable::dup() {
1160   transition(vtos, vtos);
1161 
1162   __ ld(R11_scratch1, Interpreter::stackElementSize, R15_esp);
1163   __ push_ptr(R11_scratch1);
1164 }
1165 
1166 void TemplateTable::dup_x1() {
1167   transition(vtos, vtos);
1168 
1169   Register Ra = R11_scratch1,
1170            Rb = R12_scratch2;
1171   // stack: ..., a, b
1172   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1173   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1174   __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
1175   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1176   __ push_ptr(Rb);
1177   // stack: ..., b, a, b
1178 }
1179 
1180 void TemplateTable::dup_x2() {
1181   transition(vtos, vtos);
1182 
1183   Register Ra = R11_scratch1,
1184            Rb = R12_scratch2,
1185            Rc = R3_ARG1;
1186 
1187   // stack: ..., a, b, c
1188   __ ld(Rc, Interpreter::stackElementSize,     R15_esp);  // load c
1189   __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp);  // load a
1190   __ std(Rc, Interpreter::stackElementSize * 3, R15_esp); // store c in a
1191   __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp);  // load b
1192   // stack: ..., c, b, c
1193   __ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in b
1194   // stack: ..., c, a, c
1195   __ std(Rb, Interpreter::stackElementSize,     R15_esp); // store b in c
1196   __ push_ptr(Rc);                                        // push c
1197   // stack: ..., c, a, b, c
1198 }
1199 
1200 void TemplateTable::dup2() {
1201   transition(vtos, vtos);
1202 
1203   Register Ra = R11_scratch1,
1204            Rb = R12_scratch2;
1205   // stack: ..., a, b
1206   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1207   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1208   __ push_2ptrs(Ra, Rb);
1209   // stack: ..., a, b, a, b
1210 }
1211 
1212 void TemplateTable::dup2_x1() {
1213   transition(vtos, vtos);
1214 
1215   Register Ra = R11_scratch1,
1216            Rb = R12_scratch2,
1217            Rc = R3_ARG1;
1218   // stack: ..., a, b, c
1219   __ ld(Rc, Interpreter::stackElementSize,     R15_esp);
1220   __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp);
1221   __ std(Rc, Interpreter::stackElementSize * 2, R15_esp);
1222   __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp);
1223   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1224   __ std(Rb, Interpreter::stackElementSize * 3, R15_esp);
1225   // stack: ..., b, c, a
1226   __ push_2ptrs(Rb, Rc);
1227   // stack: ..., b, c, a, b, c
1228 }
1229 
1230 void TemplateTable::dup2_x2() {
1231   transition(vtos, vtos);
1232 
1233   Register Ra = R11_scratch1,
1234            Rb = R12_scratch2,
1235            Rc = R3_ARG1,
1236            Rd = R4_ARG2;
1237   // stack: ..., a, b, c, d
1238   __ ld(Rb, Interpreter::stackElementSize * 3, R15_esp);
1239   __ ld(Rd, Interpreter::stackElementSize,     R15_esp);
1240   __ std(Rb, Interpreter::stackElementSize,     R15_esp);  // store b in d
1241   __ std(Rd, Interpreter::stackElementSize * 3, R15_esp);  // store d in b
1242   __ ld(Ra, Interpreter::stackElementSize * 4, R15_esp);
1243   __ ld(Rc, Interpreter::stackElementSize * 2, R15_esp);
1244   __ std(Ra, Interpreter::stackElementSize * 2, R15_esp);  // store a in c
1245   __ std(Rc, Interpreter::stackElementSize * 4, R15_esp);  // store c in a
1246   // stack: ..., c, d, a, b
1247   __ push_2ptrs(Rc, Rd);
1248   // stack: ..., c, d, a, b, c, d
1249 }
1250 
1251 void TemplateTable::swap() {
1252   transition(vtos, vtos);
1253   // stack: ..., a, b
1254 
1255   Register Ra = R11_scratch1,
1256            Rb = R12_scratch2;
1257   // stack: ..., a, b
1258   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1259   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1260   __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
1261   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1262   // stack: ..., b, a
1263 }
1264 
1265 void TemplateTable::iop2(Operation op) {
1266   transition(itos, itos);
1267 
1268   Register Rscratch = R11_scratch1;
1269 
1270   __ pop_i(Rscratch);
1271   // tos  = number of bits to shift
1272   // Rscratch = value to shift
1273   switch (op) {
1274     case  add:   __ add(R17_tos, Rscratch, R17_tos); break;
1275     case  sub:   __ sub(R17_tos, Rscratch, R17_tos); break;
1276     case  mul:   __ mullw(R17_tos, Rscratch, R17_tos); break;
1277     case  _and:  __ andr(R17_tos, Rscratch, R17_tos); break;
1278     case  _or:   __ orr(R17_tos, Rscratch, R17_tos); break;
1279     case  _xor:  __ xorr(R17_tos, Rscratch, R17_tos); break;
1280     case  shl:   __ rldicl(R17_tos, R17_tos, 0, 64-5); __ slw(R17_tos, Rscratch, R17_tos); break;
1281     case  shr:   __ rldicl(R17_tos, R17_tos, 0, 64-5); __ sraw(R17_tos, Rscratch, R17_tos); break;
1282     case  ushr:  __ rldicl(R17_tos, R17_tos, 0, 64-5); __ srw(R17_tos, Rscratch, R17_tos); break;
1283     default:     ShouldNotReachHere();
1284   }
1285 }
1286 
1287 void TemplateTable::lop2(Operation op) {
1288   transition(ltos, ltos);
1289 
1290   Register Rscratch = R11_scratch1;
1291   __ pop_l(Rscratch);
1292   switch (op) {
1293     case  add:   __ add(R17_tos, Rscratch, R17_tos); break;
1294     case  sub:   __ sub(R17_tos, Rscratch, R17_tos); break;
1295     case  _and:  __ andr(R17_tos, Rscratch, R17_tos); break;
1296     case  _or:   __ orr(R17_tos, Rscratch, R17_tos); break;
1297     case  _xor:  __ xorr(R17_tos, Rscratch, R17_tos); break;
1298     default:     ShouldNotReachHere();
1299   }
1300 }
1301 
1302 void TemplateTable::idiv() {
1303   transition(itos, itos);
1304 
1305   Label Lnormal, Lexception, Ldone;
1306   Register Rdividend = R11_scratch1; // Used by irem.
1307 
1308   __ addi(R0, R17_tos, 1);
1309   __ cmplwi(CR0, R0, 2);
1310   __ bgt(CR0, Lnormal); // divisor <-1 or >1
1311 
1312   __ cmpwi(CR1, R17_tos, 0);
1313   __ beq(CR1, Lexception); // divisor == 0
1314 
1315   __ pop_i(Rdividend);
1316   __ mullw(R17_tos, Rdividend, R17_tos); // div by +/-1
1317   __ b(Ldone);
1318 
1319   __ bind(Lexception);
1320   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
1321   __ mtctr(R11_scratch1);
1322   __ bctr();
1323 
1324   __ align(32, 12);
1325   __ bind(Lnormal);
1326   __ pop_i(Rdividend);
1327   __ divw(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
1328   __ bind(Ldone);
1329 }
1330 
1331 void TemplateTable::irem() {
1332   transition(itos, itos);
1333 
1334   __ mr(R12_scratch2, R17_tos);
1335   idiv();
1336   __ mullw(R17_tos, R17_tos, R12_scratch2);
1337   __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by idiv.
1338 }
1339 
1340 void TemplateTable::lmul() {
1341   transition(ltos, ltos);
1342 
1343   __ pop_l(R11_scratch1);
1344   __ mulld(R17_tos, R11_scratch1, R17_tos);
1345 }
1346 
1347 void TemplateTable::ldiv() {
1348   transition(ltos, ltos);
1349 
1350   Label Lnormal, Lexception, Ldone;
1351   Register Rdividend = R11_scratch1; // Used by lrem.
1352 
1353   __ addi(R0, R17_tos, 1);
1354   __ cmpldi(CR0, R0, 2);
1355   __ bgt(CR0, Lnormal); // divisor <-1 or >1
1356 
1357   __ cmpdi(CR1, R17_tos, 0);
1358   __ beq(CR1, Lexception); // divisor == 0
1359 
1360   __ pop_l(Rdividend);
1361   __ mulld(R17_tos, Rdividend, R17_tos); // div by +/-1
1362   __ b(Ldone);
1363 
1364   __ bind(Lexception);
1365   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
1366   __ mtctr(R11_scratch1);
1367   __ bctr();
1368 
1369   __ align(32, 12);
1370   __ bind(Lnormal);
1371   __ pop_l(Rdividend);
1372   __ divd(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
1373   __ bind(Ldone);
1374 }
1375 
1376 void TemplateTable::lrem() {
1377   transition(ltos, ltos);
1378 
1379   __ mr(R12_scratch2, R17_tos);
1380   ldiv();
1381   __ mulld(R17_tos, R17_tos, R12_scratch2);
1382   __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by ldiv.
1383 }
1384 
1385 void TemplateTable::lshl() {
1386   transition(itos, ltos);
1387 
1388   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1389   __ pop_l(R11_scratch1);
1390   __ sld(R17_tos, R11_scratch1, R17_tos);
1391 }
1392 
1393 void TemplateTable::lshr() {
1394   transition(itos, ltos);
1395 
1396   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1397   __ pop_l(R11_scratch1);
1398   __ srad(R17_tos, R11_scratch1, R17_tos);
1399 }
1400 
1401 void TemplateTable::lushr() {
1402   transition(itos, ltos);
1403 
1404   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1405   __ pop_l(R11_scratch1);
1406   __ srd(R17_tos, R11_scratch1, R17_tos);
1407 }
1408 
1409 void TemplateTable::fop2(Operation op) {
1410   transition(ftos, ftos);
1411 
1412   switch (op) {
1413     case add: __ pop_f(F0_SCRATCH); __ fadds(F15_ftos, F0_SCRATCH, F15_ftos); break;
1414     case sub: __ pop_f(F0_SCRATCH); __ fsubs(F15_ftos, F0_SCRATCH, F15_ftos); break;
1415     case mul: __ pop_f(F0_SCRATCH); __ fmuls(F15_ftos, F0_SCRATCH, F15_ftos); break;
1416     case div: __ pop_f(F0_SCRATCH); __ fdivs(F15_ftos, F0_SCRATCH, F15_ftos); break;
1417     case rem:
1418       __ pop_f(F1_ARG1);
1419       __ fmr(F2_ARG2, F15_ftos);
1420       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
1421       __ fmr(F15_ftos, F1_RET);
1422       break;
1423 
1424     default: ShouldNotReachHere();
1425   }
1426 }
1427 
1428 void TemplateTable::dop2(Operation op) {
1429   transition(dtos, dtos);
1430 
1431   switch (op) {
1432     case add: __ pop_d(F0_SCRATCH); __ fadd(F15_ftos, F0_SCRATCH, F15_ftos); break;
1433     case sub: __ pop_d(F0_SCRATCH); __ fsub(F15_ftos, F0_SCRATCH, F15_ftos); break;
1434     case mul: __ pop_d(F0_SCRATCH); __ fmul(F15_ftos, F0_SCRATCH, F15_ftos); break;
1435     case div: __ pop_d(F0_SCRATCH); __ fdiv(F15_ftos, F0_SCRATCH, F15_ftos); break;
1436     case rem:
1437       __ pop_d(F1_ARG1);
1438       __ fmr(F2_ARG2, F15_ftos);
1439       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
1440       __ fmr(F15_ftos, F1_RET);
1441       break;
1442 
1443     default: ShouldNotReachHere();
1444   }
1445 }
1446 
1447 // Negate the value in the TOS cache.
1448 void TemplateTable::ineg() {
1449   transition(itos, itos);
1450 
1451   __ neg(R17_tos, R17_tos);
1452 }
1453 
1454 // Negate the value in the TOS cache.
1455 void TemplateTable::lneg() {
1456   transition(ltos, ltos);
1457 
1458   __ neg(R17_tos, R17_tos);
1459 }
1460 
1461 void TemplateTable::fneg() {
1462   transition(ftos, ftos);
1463 
1464   __ fneg(F15_ftos, F15_ftos);
1465 }
1466 
1467 void TemplateTable::dneg() {
1468   transition(dtos, dtos);
1469 
1470   __ fneg(F15_ftos, F15_ftos);
1471 }
1472 
1473 // Increments a local variable in place.
1474 void TemplateTable::iinc() {
1475   transition(vtos, vtos);
1476 
1477   const Register Rindex     = R11_scratch1,
1478                  Rincrement = R0,
1479                  Rvalue     = R12_scratch2;
1480 
1481   locals_index(Rindex);              // Load locals index from bytecode stream.
1482   __ lbz(Rincrement, 2, R14_bcp);    // Load increment from the bytecode stream.
1483   __ extsb(Rincrement, Rincrement);
1484 
1485   __ load_local_int(Rvalue, Rindex, Rindex); // Puts address of local into Rindex.
1486 
1487   __ add(Rvalue, Rincrement, Rvalue);
1488   __ stw(Rvalue, 0, Rindex);
1489 }
1490 
1491 void TemplateTable::wide_iinc() {
1492   transition(vtos, vtos);
1493 
1494   Register Rindex       = R11_scratch1,
1495            Rlocals_addr = Rindex,
1496            Rincr        = R12_scratch2;
1497   locals_index_wide(Rindex);
1498   __ get_2_byte_integer_at_bcp(4, Rincr, InterpreterMacroAssembler::Signed);
1499   __ load_local_int(R17_tos, Rlocals_addr, Rindex);
1500   __ add(R17_tos, Rincr, R17_tos);
1501   __ stw(R17_tos, 0, Rlocals_addr);
1502 }
1503 
1504 void TemplateTable::convert() {
1505   // %%%%% Factor this first part across platforms
1506 #ifdef ASSERT
1507   TosState tos_in  = ilgl;
1508   TosState tos_out = ilgl;
1509   switch (bytecode()) {
1510     case Bytecodes::_i2l: // fall through
1511     case Bytecodes::_i2f: // fall through
1512     case Bytecodes::_i2d: // fall through
1513     case Bytecodes::_i2b: // fall through
1514     case Bytecodes::_i2c: // fall through
1515     case Bytecodes::_i2s: tos_in = itos; break;
1516     case Bytecodes::_l2i: // fall through
1517     case Bytecodes::_l2f: // fall through
1518     case Bytecodes::_l2d: tos_in = ltos; break;
1519     case Bytecodes::_f2i: // fall through
1520     case Bytecodes::_f2l: // fall through
1521     case Bytecodes::_f2d: tos_in = ftos; break;
1522     case Bytecodes::_d2i: // fall through
1523     case Bytecodes::_d2l: // fall through
1524     case Bytecodes::_d2f: tos_in = dtos; break;
1525     default             : ShouldNotReachHere();
1526   }
1527   switch (bytecode()) {
1528     case Bytecodes::_l2i: // fall through
1529     case Bytecodes::_f2i: // fall through
1530     case Bytecodes::_d2i: // fall through
1531     case Bytecodes::_i2b: // fall through
1532     case Bytecodes::_i2c: // fall through
1533     case Bytecodes::_i2s: tos_out = itos; break;
1534     case Bytecodes::_i2l: // fall through
1535     case Bytecodes::_f2l: // fall through
1536     case Bytecodes::_d2l: tos_out = ltos; break;
1537     case Bytecodes::_i2f: // fall through
1538     case Bytecodes::_l2f: // fall through
1539     case Bytecodes::_d2f: tos_out = ftos; break;
1540     case Bytecodes::_i2d: // fall through
1541     case Bytecodes::_l2d: // fall through
1542     case Bytecodes::_f2d: tos_out = dtos; break;
1543     default             : ShouldNotReachHere();
1544   }
1545   transition(tos_in, tos_out);
1546 #endif
1547 
1548   // Conversion
1549   Label done;
1550   switch (bytecode()) {
1551     case Bytecodes::_i2l:
1552       __ extsw(R17_tos, R17_tos);
1553       break;
1554 
1555     case Bytecodes::_l2i:
1556       // Nothing to do, we'll continue to work with the lower bits.
1557       break;
1558 
1559     case Bytecodes::_i2b:
1560       __ extsb(R17_tos, R17_tos);
1561       break;
1562 
1563     case Bytecodes::_i2c:
1564       __ rldicl(R17_tos, R17_tos, 0, 64-2*8);
1565       break;
1566 
1567     case Bytecodes::_i2s:
1568       __ extsh(R17_tos, R17_tos);
1569       break;
1570 
1571     case Bytecodes::_i2d:
1572       __ extsw(R17_tos, R17_tos);
1573     case Bytecodes::_l2d:
1574       __ move_l_to_d();
1575       __ fcfid(F15_ftos, F15_ftos);
1576       break;
1577 
1578     case Bytecodes::_i2f:
1579       __ extsw(R17_tos, R17_tos);
1580       __ move_l_to_d();
1581       __ fcfids(F15_ftos, F15_ftos);
1582       break;
1583 
1584     case Bytecodes::_l2f:
1585       __ move_l_to_d();
1586       __ fcfids(F15_ftos, F15_ftos);
1587       break;
1588 
1589     case Bytecodes::_f2d:
1590       // empty
1591       break;
1592 
1593     case Bytecodes::_d2f:
1594       __ frsp(F15_ftos, F15_ftos);
1595       break;
1596 
1597     case Bytecodes::_d2i:
1598     case Bytecodes::_f2i:
1599       __ fcmpu(CR0, F15_ftos, F15_ftos);
1600       __ li(R17_tos, 0); // 0 in case of NAN
1601       __ bso(CR0, done);
1602       __ fctiwz(F15_ftos, F15_ftos);
1603       __ move_d_to_l();
1604       break;
1605 
1606     case Bytecodes::_d2l:
1607     case Bytecodes::_f2l:
1608       __ fcmpu(CR0, F15_ftos, F15_ftos);
1609       __ li(R17_tos, 0); // 0 in case of NAN
1610       __ bso(CR0, done);
1611       __ fctidz(F15_ftos, F15_ftos);
1612       __ move_d_to_l();
1613       break;
1614 
1615     default: ShouldNotReachHere();
1616   }
1617   __ bind(done);
1618 }
1619 
1620 // Long compare
1621 void TemplateTable::lcmp() {
1622   transition(ltos, itos);
1623 
1624   const Register Rscratch = R11_scratch1;
1625   __ pop_l(Rscratch); // first operand, deeper in stack
1626 
1627   __ cmpd(CR0, Rscratch, R17_tos); // compare
1628   __ set_cmp3(R17_tos); // set result as follows: <: -1, =: 0, >: 1
1629 }
1630 
1631 // fcmpl/fcmpg and dcmpl/dcmpg bytecodes
1632 // unordered_result == -1 => fcmpl or dcmpl
1633 // unordered_result ==  1 => fcmpg or dcmpg
1634 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1635   const FloatRegister Rfirst  = F0_SCRATCH,
1636                       Rsecond = F15_ftos;
1637   const Register Rscratch = R11_scratch1;
1638 
1639   if (is_float) {
1640     __ pop_f(Rfirst);
1641   } else {
1642     __ pop_d(Rfirst);
1643   }
1644 
1645   __ fcmpu(CR0, Rfirst, Rsecond); // compare
1646   // if unordered_result is 1, treat unordered_result like 'greater than'
1647   assert(unordered_result == 1 || unordered_result == -1, "unordered_result can be either 1 or -1");
1648   __ set_cmpu3(R17_tos, unordered_result != 1);
1649 }
1650 
1651 // Branch_conditional which takes TemplateTable::Condition.
1652 void TemplateTable::branch_conditional(ConditionRegister crx, TemplateTable::Condition cc, Label& L, bool invert) {
1653   bool positive = false;
1654   Assembler::Condition cond = Assembler::equal;
1655   switch (cc) {
1656     case TemplateTable::equal:         positive = true ; cond = Assembler::equal  ; break;
1657     case TemplateTable::not_equal:     positive = false; cond = Assembler::equal  ; break;
1658     case TemplateTable::less:          positive = true ; cond = Assembler::less   ; break;
1659     case TemplateTable::less_equal:    positive = false; cond = Assembler::greater; break;
1660     case TemplateTable::greater:       positive = true ; cond = Assembler::greater; break;
1661     case TemplateTable::greater_equal: positive = false; cond = Assembler::less   ; break;
1662     default: ShouldNotReachHere();
1663   }
1664   int bo = (positive != invert) ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
1665   int bi = Assembler::bi0(crx, cond);
1666   __ bc(bo, bi, L);
1667 }
1668 
1669 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1670 
1671   const Register Rscratch1    = R11_scratch1,
1672                  Rscratch2    = R12_scratch2,
1673                  Rscratch3    = R3_ARG1,
1674                  R4_counters  = R4_ARG2,
1675                  bumped_count = R31,
1676                  Rdisp        = R22_tmp2;
1677 
1678   __ profile_taken_branch(Rscratch1, bumped_count);
1679 
1680   // Get (wide) offset.
1681   if (is_wide) {
1682     __ get_4_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
1683   } else {
1684     __ get_2_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
1685   }
1686 
1687   // --------------------------------------------------------------------------
1688   // Handle all the JSR stuff here, then exit.
1689   // It's much shorter and cleaner than intermingling with the
1690   // non-JSR normal-branch stuff occurring below.
1691   if (is_jsr) {
1692     // Compute return address as bci in Otos_i.
1693     __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
1694     __ addi(Rscratch2, R14_bcp, -in_bytes(ConstMethod::codes_offset()) + (is_wide ? 5 : 3));
1695     __ subf(R17_tos, Rscratch1, Rscratch2);
1696 
1697     // Bump bcp to target of JSR.
1698     __ add(R14_bcp, Rdisp, R14_bcp);
1699     // Push returnAddress for "ret" on stack.
1700     __ push_ptr(R17_tos);
1701     // And away we go!
1702     __ dispatch_next(vtos, 0 ,true);
1703     return;
1704   }
1705 
1706   // --------------------------------------------------------------------------
1707   // Normal (non-jsr) branch handling
1708 
1709   // Bump bytecode pointer by displacement (take the branch).
1710   __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
1711 
1712   const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1713   if (increment_invocation_counter_for_backward_branches) {
1714     Label Lforward;
1715 
1716     // Check branch direction.
1717     __ cmpdi(CR0, Rdisp, 0);
1718     __ bgt(CR0, Lforward);
1719 
1720     __ get_method_counters(R19_method, R4_counters, Lforward);
1721 
1722     Label Lno_mdo, Loverflow;
1723     const int increment = InvocationCounter::count_increment;
1724     if (ProfileInterpreter) {
1725       Register Rmdo = Rscratch1;
1726 
1727       // If no method data exists, go to profile_continue.
1728       __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
1729       __ cmpdi(CR0, Rmdo, 0);
1730       __ beq(CR0, Lno_mdo);
1731 
1732       // Increment backedge counter in the MDO.
1733       const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
1734       __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
1735       __ lwz(Rscratch3, in_bytes(MethodData::backedge_mask_offset()), Rmdo);
1736       __ addi(Rscratch2, Rscratch2, increment);
1737       __ stw(Rscratch2, mdo_bc_offs, Rmdo);
1738       if (UseOnStackReplacement) {
1739         __ and_(Rscratch3, Rscratch2, Rscratch3);
1740         __ bne(CR0, Lforward);
1741         __ b(Loverflow);
1742       } else {
1743         __ b(Lforward);
1744       }
1745     }
1746 
1747     // If there's no MDO, increment counter in method.
1748     const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
1749     __ bind(Lno_mdo);
1750     __ lwz(Rscratch2, mo_bc_offs, R4_counters);
1751     __ lwz(Rscratch3, in_bytes(MethodCounters::backedge_mask_offset()), R4_counters);
1752     __ addi(Rscratch2, Rscratch2, increment);
1753     __ stw(Rscratch2, mo_bc_offs, R4_counters);
1754     if (UseOnStackReplacement) {
1755       __ and_(Rscratch3, Rscratch2, Rscratch3);
1756       __ bne(CR0, Lforward);
1757     } else {
1758       __ b(Lforward);
1759     }
1760     __ bind(Loverflow);
1761 
1762     // Notify point for loop, pass branch bytecode.
1763     __ subf(R4_ARG2, Rdisp, R14_bcp); // Compute branch bytecode (previous bcp).
1764     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
1765 
1766     // Was an OSR adapter generated?
1767     __ cmpdi(CR0, R3_RET, 0);
1768     __ beq(CR0, Lforward);
1769 
1770     // Has the nmethod been invalidated already?
1771     __ lbz(R0, in_bytes(nmethod::state_offset()), R3_RET);
1772     __ cmpwi(CR0, R0, nmethod::in_use);
1773     __ bne(CR0, Lforward);
1774 
1775     // Migrate the interpreter frame off of the stack.
1776     // We can use all registers because we will not return to interpreter from this point.
1777 
1778     // Save nmethod.
1779     const Register osr_nmethod = R31;
1780     __ mr(osr_nmethod, R3_RET);
1781     __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1);
1782     JFR_ONLY(__ enter_jfr_critical_section();)
1783     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread);
1784     __ reset_last_Java_frame();
1785     // OSR buffer is in ARG1.
1786 
1787     // Remove the interpreter frame.
1788     __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R12_scratch2, R11_scratch1, R0);
1789     JFR_ONLY(__ leave_jfr_critical_section();)
1790 
1791     // Jump to the osr code.
1792     __ ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod);
1793     __ mtlr(R12_scratch2);
1794     __ mtctr(R11_scratch1);
1795     __ bctr();
1796 
1797     __ bind(Lforward);
1798   }
1799   __ dispatch_next(vtos, 0, true);
1800 }
1801 
1802 // Helper function for if_cmp* methods below.
1803 // Factored out common compare and branch code.
1804 void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool is_acmp) {
1805   Label Lnot_taken;
1806   // Note: The condition code we get is the condition under which we
1807   // *fall through*! So we have to inverse the CC here.
1808 
1809   if (is_jint) {
1810     if (Rsecond == noreg) {
1811       __ cmpwi(CR0, Rfirst, 0);
1812     } else {
1813       __ cmpw(CR0, Rfirst, Rsecond);
1814     }
1815   } else {
1816     if (Rsecond == noreg) {
1817       __ cmpdi(CR0, Rfirst, 0);
1818     } else {
1819       __ cmpd(CR0, Rfirst, Rsecond);
1820     }
1821   }
1822   branch_conditional(CR0, cc, Lnot_taken, /*invert*/ true);
1823 
1824   // Conition is false => Jump!
1825   branch(false, false);
1826 
1827   // Condition is not true => Continue.
1828   __ align(32, 12);
1829   __ bind(Lnot_taken);
1830   __ profile_not_taken_branch(Rscratch1, Rscratch2, is_acmp);
1831 }
1832 
1833 // Compare integer values with zero and fall through if CC holds, branch away otherwise.
1834 void TemplateTable::if_0cmp(Condition cc) {
1835   transition(itos, vtos);
1836 
1837   if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, true);
1838 }
1839 
1840 // Compare integer values and fall through if CC holds, branch away otherwise.
1841 //
1842 // Interface:
1843 //  - Rfirst: First operand  (older stack value)
1844 //  - tos:    Second operand (younger stack value)
1845 void TemplateTable::if_icmp(Condition cc) {
1846   transition(itos, vtos);
1847 
1848   const Register Rfirst  = R0,
1849                  Rsecond = R17_tos;
1850 
1851   __ pop_i(Rfirst);
1852   if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, true);
1853 }
1854 
1855 void TemplateTable::if_nullcmp(Condition cc) {
1856   transition(atos, vtos);
1857 
1858   if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, false);
1859 }
1860 
1861 void TemplateTable::if_acmp(Condition cc) {
1862   transition(atos, vtos);
1863 
1864   const Register Rfirst  = R31,
1865                  Rsecond = R17_tos;
1866 
1867   __ pop_ptr(Rfirst);
1868 
1869   __ profile_acmp(Rsecond, Rfirst, R11_scratch1, R12_scratch2);
1870 
1871   const int is_inline_type_mask = markWord::inline_type_pattern;
1872   if (Arguments::is_valhalla_enabled()) {
1873     Label taken, not_taken;
1874     __ cmpd(CR0, Rfirst, Rsecond);
1875     __ beq(CR0, (cc == equal) ? taken : not_taken);
1876 
1877     // test if any input is null
1878     __ cmpdi(CR0, Rfirst, 0);
1879     __ cmpdi(CR1, Rsecond, 0);
1880     __ cror(CR0, Assembler::equal, CR1, Assembler::equal);
1881     __ beq(CR0, (cc == equal) ? not_taken : taken);
1882 
1883     // and both are values ?
1884     __ ld(R11_scratch1, oopDesc::mark_offset_in_bytes(), Rfirst);
1885     __ ld(R12_scratch2, oopDesc::mark_offset_in_bytes(), Rsecond);
1886     __ andr(R11_scratch1, R11_scratch1, R12_scratch2);
1887     __ andi(R11_scratch1, R11_scratch1, is_inline_type_mask);
1888     __ cmpdi(CR0, R11_scratch1, is_inline_type_mask);
1889     __ bne(CR0, (cc == equal) ? not_taken : taken);
1890 
1891     // same value klass ?
1892     __ load_metadata(R11_scratch1, Rfirst);
1893     __ load_metadata(R12_scratch2, Rsecond);
1894     __ cmpd(CR0, R11_scratch1, R12_scratch2);
1895     __ bne(CR0, (cc == equal) ? not_taken : taken);
1896 
1897     // Know both are the same type, let's test for substitutability...
1898     if (cc == equal) {
1899       invoke_is_substitutable(Rfirst, Rsecond, taken, not_taken);
1900     } else {
1901       invoke_is_substitutable(Rfirst, Rsecond, not_taken, taken);
1902     }
1903     DEBUG_ONLY( __ stop("Not reachable"); )
1904 
1905     // Conition is false => Jump!
1906     __ align(32, 12);
1907     __ bind(taken);
1908     branch(false, false);
1909 
1910     // Condition is not true => Continue.
1911     __ align(32, 12);
1912     __ bind(not_taken);
1913     __ profile_not_taken_branch(R11_scratch1, R12_scratch2, true);
1914 
1915   } else {
1916     if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, false, true);
1917   }
1918 }
1919 
1920 void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst) {
1921   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj);
1922   __ cmpwi(CR0, R3_RET, 0);
1923   __ beq(CR0, not_subst);
1924   __ b(is_subst);
1925 }
1926 
1927 void TemplateTable::ret() {
1928   locals_index(R11_scratch1);
1929   __ load_local_ptr(R17_tos, R11_scratch1, R11_scratch1);
1930 
1931   __ profile_ret(vtos, R17_tos, R11_scratch1, R12_scratch2);
1932 
1933   __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
1934   __ add(R11_scratch1, R17_tos, R11_scratch1);
1935   __ addi(R14_bcp, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
1936   __ dispatch_next(vtos, 0, true);
1937 }
1938 
1939 void TemplateTable::wide_ret() {
1940   transition(vtos, vtos);
1941 
1942   const Register Rindex = R3_ARG1,
1943                  Rscratch1 = R11_scratch1,
1944                  Rscratch2 = R12_scratch2;
1945 
1946   locals_index_wide(Rindex);
1947   __ load_local_ptr(R17_tos, R17_tos, Rindex);
1948   __ profile_ret(vtos, R17_tos, Rscratch1, R12_scratch2);
1949   // Tos now contains the bci, compute the bcp from that.
1950   __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
1951   __ addi(Rscratch2, R17_tos, in_bytes(ConstMethod::codes_offset()));
1952   __ add(R14_bcp, Rscratch1, Rscratch2);
1953   __ dispatch_next(vtos, 0, true);
1954 }
1955 
1956 void TemplateTable::tableswitch() {
1957   transition(itos, vtos);
1958 
1959   Label Ldispatch, Ldefault_case;
1960   Register Rlow_byte         = R3_ARG1,
1961            Rindex            = Rlow_byte,
1962            Rhigh_byte        = R4_ARG2,
1963            Rdef_offset_addr  = R5_ARG3, // is going to contain address of default offset
1964            Rscratch1         = R11_scratch1,
1965            Rscratch2         = R12_scratch2,
1966            Roffset           = R6_ARG4;
1967 
1968   // Align bcp.
1969   __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
1970   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
1971 
1972   // Load lo & hi.
1973   __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
1974   __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned);
1975 
1976   // Check for default case (=index outside [low,high]).
1977   __ cmpw(CR0, R17_tos, Rlow_byte);
1978   __ cmpw(CR1, R17_tos, Rhigh_byte);
1979   __ blt(CR0, Ldefault_case);
1980   __ bgt(CR1, Ldefault_case);
1981 
1982   // Lookup dispatch offset.
1983   __ sub(Rindex, R17_tos, Rlow_byte);
1984   __ extsw(Rindex, Rindex);
1985   __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
1986   __ sldi(Rindex, Rindex, LogBytesPerInt);
1987   __ addi(Rindex, Rindex, 3 * BytesPerInt);
1988 #if defined(VM_LITTLE_ENDIAN)
1989   __ lwbrx(Roffset, Rdef_offset_addr, Rindex);
1990   __ extsw(Roffset, Roffset);
1991 #else
1992   __ lwax(Roffset, Rdef_offset_addr, Rindex);
1993 #endif
1994   __ b(Ldispatch);
1995 
1996   __ bind(Ldefault_case);
1997   __ profile_switch_default(Rhigh_byte, Rscratch1);
1998   __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
1999 
2000   __ bind(Ldispatch);
2001 
2002   __ add(R14_bcp, Roffset, R14_bcp);
2003   __ dispatch_next(vtos, 0, true);
2004 }
2005 
2006 void TemplateTable::lookupswitch() {
2007   transition(itos, itos);
2008   __ stop("lookupswitch bytecode should have been rewritten");
2009 }
2010 
2011 // Table switch using linear search through cases.
2012 // Bytecode stream format:
2013 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
2014 // Note: Everything is big-endian format here.
2015 void TemplateTable::fast_linearswitch() {
2016   transition(itos, vtos);
2017 
2018   Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case;
2019   Register Rcount           = R3_ARG1,
2020            Rcurrent_pair    = R4_ARG2,
2021            Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
2022            Roffset          = R31,     // Might need to survive C call.
2023            Rvalue           = R12_scratch2,
2024            Rscratch         = R11_scratch1,
2025            Rcmp_value       = R17_tos;
2026 
2027   // Align bcp.
2028   __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
2029   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
2030 
2031   // Setup loop counter and limit.
2032   __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
2033   __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.
2034 
2035   __ mtctr(Rcount);
2036   __ cmpwi(CR0, Rcount, 0);
2037   __ bne(CR0, Lloop_entry);
2038 
2039   // Default case
2040   __ bind(Ldefault_case);
2041   __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
2042   if (ProfileInterpreter) {
2043     __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
2044   }
2045   __ b(Lcontinue_execution);
2046 
2047   // Next iteration
2048   __ bind(Lsearch_loop);
2049   __ bdz(Ldefault_case);
2050   __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
2051   __ bind(Lloop_entry);
2052   __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned);
2053   __ cmpw(CR0, Rvalue, Rcmp_value);
2054   __ bne(CR0, Lsearch_loop);
2055 
2056   // Found, load offset.
2057   __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed);
2058   // Calculate case index and profile
2059   __ mfctr(Rcurrent_pair);
2060   if (ProfileInterpreter) {
2061     __ sub(Rcurrent_pair, Rcount, Rcurrent_pair);
2062     __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
2063   }
2064 
2065   __ bind(Lcontinue_execution);
2066   __ add(R14_bcp, Roffset, R14_bcp);
2067   __ dispatch_next(vtos, 0, true);
2068 }
2069 
2070 // Table switch using binary search (value/offset pairs are ordered).
2071 // Bytecode stream format:
2072 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
2073 // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
2074 void TemplateTable::fast_binaryswitch() {
2075 
2076   transition(itos, vtos);
2077   // Implementation using the following core algorithm: (copied from Intel)
2078   //
2079   // int binary_search(int key, LookupswitchPair* array, int n) {
2080   //   // Binary search according to "Methodik des Programmierens" by
2081   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2082   //   int i = 0;
2083   //   int j = n;
2084   //   while (i+1 < j) {
2085   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2086   //     // with      Q: for all i: 0 <= i < n: key < a[i]
2087   //     // where a stands for the array and assuming that the (inexisting)
2088   //     // element a[n] is infinitely big.
2089   //     int h = (i + j) >> 1;
2090   //     // i < h < j
2091   //     if (key < array[h].fast_match()) {
2092   //       j = h;
2093   //     } else {
2094   //       i = h;
2095   //     }
2096   //   }
2097   //   // R: a[i] <= key < a[i+1] or Q
2098   //   // (i.e., if key is within array, i is the correct index)
2099   //   return i;
2100   // }
2101 
2102   // register allocation
2103   const Register Rkey     = R17_tos;          // already set (tosca)
2104   const Register Rarray   = R3_ARG1;
2105   const Register Ri       = R4_ARG2;
2106   const Register Rj       = R5_ARG3;
2107   const Register Rh       = R6_ARG4;
2108   const Register Rscratch = R11_scratch1;
2109 
2110   const int log_entry_size = 3;
2111   const int entry_size = 1 << log_entry_size;
2112 
2113   Label found;
2114 
2115   // Find Array start,
2116   __ addi(Rarray, R14_bcp, 3 * BytesPerInt);
2117   __ clrrdi(Rarray, Rarray, LogBytesPerInt);
2118 
2119   // initialize i & j
2120   __ li(Ri,0);
2121   __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned);
2122 
2123   // and start.
2124   Label entry;
2125   __ b(entry);
2126 
2127   // binary search loop
2128   { Label loop;
2129     __ bind(loop);
2130     // int h = (i + j) >> 1;
2131     __ srdi(Rh, Rh, 1);
2132     // if (key < array[h].fast_match()) {
2133     //   j = h;
2134     // } else {
2135     //   i = h;
2136     // }
2137     __ sldi(Rscratch, Rh, log_entry_size);
2138 #if defined(VM_LITTLE_ENDIAN)
2139     __ lwbrx(Rscratch, Rscratch, Rarray);
2140 #else
2141     __ lwzx(Rscratch, Rscratch, Rarray);
2142 #endif
2143 
2144     // if (key < current value)
2145     //   Rh = Rj
2146     // else
2147     //   Rh = Ri
2148     Label Lgreater;
2149     __ cmpw(CR0, Rkey, Rscratch);
2150     __ bge(CR0, Lgreater);
2151     __ mr(Rj, Rh);
2152     __ b(entry);
2153     __ bind(Lgreater);
2154     __ mr(Ri, Rh);
2155 
2156     // while (i+1 < j)
2157     __ bind(entry);
2158     __ addi(Rscratch, Ri, 1);
2159     __ cmpw(CR0, Rscratch, Rj);
2160     __ add(Rh, Ri, Rj); // start h = i + j >> 1;
2161 
2162     __ blt(CR0, loop);
2163   }
2164 
2165   // End of binary search, result index is i (must check again!).
2166   Label default_case;
2167   Label continue_execution;
2168   if (ProfileInterpreter) {
2169     __ mr(Rh, Ri);              // Save index in i for profiling.
2170   }
2171   // Ri = value offset
2172   __ sldi(Ri, Ri, log_entry_size);
2173   __ add(Ri, Ri, Rarray);
2174   __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);
2175 
2176   Label not_found;
2177   // Ri = offset offset
2178   __ cmpw(CR0, Rkey, Rscratch);
2179   __ beq(CR0, not_found);
2180   // entry not found -> j = default offset
2181   __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned);
2182   __ b(default_case);
2183 
2184   __ bind(not_found);
2185   // entry found -> j = offset
2186   __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
2187   __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned);
2188 
2189   if (ProfileInterpreter) {
2190     __ b(continue_execution);
2191   }
2192 
2193   __ bind(default_case); // fall through (if not profiling)
2194   __ profile_switch_default(Ri, Rscratch);
2195 
2196   __ bind(continue_execution);
2197 
2198   __ extsw(Rj, Rj);
2199   __ add(R14_bcp, Rj, R14_bcp);
2200   __ dispatch_next(vtos, 0 , true);
2201 }
2202 
2203 void TemplateTable::_return(TosState state) {
2204   transition(state, state);
2205   assert(_desc->calls_vm(),
2206          "inconsistent calls_vm information"); // call in remove_activation
2207 
2208   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2209 
2210     Register Rscratch     = R11_scratch1,
2211              Rklass       = R12_scratch2,
2212              Rklass_flags = Rklass;
2213     Label Lskip_register_finalizer;
2214 
2215     // Check if the method has the FINALIZER flag set and call into the VM to finalize in this case.
2216     assert(state == vtos, "only valid state");
2217     __ ld(R17_tos, 0, R18_locals);
2218 
2219     // Load klass of this obj.
2220     __ load_klass(Rklass, R17_tos);
2221     __ lbz(Rklass_flags, in_bytes(Klass::misc_flags_offset()), Rklass);
2222     __ testbitdi(CR0, R0, Rklass_flags, exact_log2(KlassFlags::_misc_has_finalizer));
2223     __ bfalse(CR0, Lskip_register_finalizer);
2224 
2225     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), R17_tos /* obj */);
2226 
2227     __ align(32, 12);
2228     __ bind(Lskip_register_finalizer);
2229   }
2230 
2231   if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
2232     Label no_safepoint;
2233     __ ld(R11_scratch1, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2234     __ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit());
2235     __ beq(CR0, no_safepoint);
2236     __ push(state);
2237     __ push_cont_fastpath();
2238     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
2239     __ pop_cont_fastpath();
2240     __ pop(state);
2241     __ bind(no_safepoint);
2242   }
2243 
2244   // Move the result value into the correct register and remove memory stack frame.
2245   __ remove_activation(state, /* throw_monitor_exception */ true);
2246   // Restoration of lr done by remove_activation.
2247   switch (state) {
2248     // Narrow result if state is itos but result type is smaller.
2249     // Need to narrow in the return bytecode rather than in generate_return_entry
2250     // since compiled code callers expect the result to already be narrowed.
2251     case itos: __ narrow(R17_tos); /* fall through */
2252     case ltos:
2253     case atos: __ mr(R3_RET, R17_tos); break;
2254     case ftos:
2255     case dtos: __ fmr(F1_RET, F15_ftos); break;
2256     case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
2257                // to get visible before the reference to the object gets stored anywhere.
2258                __ membar(Assembler::StoreStore); break;
2259     default  : ShouldNotReachHere();
2260   }
2261   __ blr();
2262 }
2263 
2264 // ============================================================================
2265 // Constant pool cache access
2266 //
2267 // Memory ordering:
2268 //
2269 // Like done in C++ interpreter, we load the fields
2270 //   - _indices
2271 //   - _f12_oop
2272 // acquired, because these are asked if the cache is already resolved. We don't
2273 // want to float loads above this check.
2274 // See also comments in ConstantPoolCacheEntry::bytecode_1(),
2275 // ConstantPoolCacheEntry::bytecode_2() and ConstantPoolCacheEntry::f1();
2276 
2277 // Call into the VM if call site is not yet resolved
2278 //
2279 // Input regs:
2280 //   - None, all passed regs are outputs.
2281 //
2282 // Returns:
2283 //   - Rcache:  The const pool cache entry that contains the resolved result.
2284 //
2285 // Kills:
2286 //   - Rscratch
2287 void TemplateTable::resolve_cache_and_index_for_method(int byte_no, Register Rcache, Register Rscratch) {
2288   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2289 
2290   Label L_clinit_barrier_slow, L_done;
2291   Register Rindex = Rscratch;
2292 
2293   Bytecodes::Code code = bytecode();
2294   const int bytecode_offset = (byte_no == f1_byte) ? in_bytes(ResolvedMethodEntry::bytecode1_offset())
2295                                                    : in_bytes(ResolvedMethodEntry::bytecode2_offset());
2296   __ load_method_entry(Rcache, Rindex);
2297   // Load-acquire the bytecode to match store-release in InterpreterRuntime
2298   __ lbz(Rscratch, bytecode_offset, Rcache);
2299   // Acquire by cmp-br-isync (see below).
2300   __ cmpdi(CR0, Rscratch, (int)code);
2301   __ bne(CR0, L_clinit_barrier_slow);
2302 
2303   __ isync(); // Order load wrt. succeeding loads.
2304 
2305   // Class initialization barrier for static methods
2306   if (bytecode() == Bytecodes::_invokestatic) {
2307     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2308     const Register method = Rscratch;
2309     const Register klass  = Rscratch;
2310 
2311     __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
2312     __ load_method_holder(klass, method);
2313     __ clinit_barrier(klass, R16_thread, &L_done, /*L_slow_path*/ nullptr);
2314   } else {
2315     __ b(L_done);
2316   }
2317 
2318   // Class initialization barrier slow path lands here as well.
2319   __ bind(L_clinit_barrier_slow);
2320   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2321   __ li(R4_ARG2, code);
2322   __ call_VM_preemptable(noreg, entry, R4_ARG2);
2323 
2324   // Update registers with resolved info.
2325   __ load_method_entry(Rcache, Rindex);
2326   __ bind(L_done);
2327 }
2328 
2329 void TemplateTable::resolve_cache_and_index_for_field(int byte_no, Register Rcache, Register index) {
2330   assert_different_registers(Rcache, index);
2331 
2332   Label L_clinit_barrier_slow, L_done;
2333 
2334   Bytecodes::Code code = bytecode();
2335   switch (code) {
2336   case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2337   case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2338   default: break;
2339   }
2340 
2341   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2342   __ load_field_entry(Rcache, index);
2343   int code_offset = (byte_no == f1_byte) ? in_bytes(ResolvedFieldEntry::get_code_offset())
2344                                          : in_bytes(ResolvedFieldEntry::put_code_offset());
2345   __ lbz(R0, code_offset, Rcache);
2346   __ cmpwi(CR0, R0, (int)code); // have we resolved this bytecode?
2347   __ bne(CR0, L_clinit_barrier_slow);
2348 
2349   __ isync(); // Order load wrt. succeeding loads.
2350 
2351   // Class initialization barrier for static fields
2352   if (bytecode() == Bytecodes::_getstatic || bytecode() == Bytecodes::_putstatic) {
2353     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2354     const Register field_holder = R4_ARG2;
2355 
2356     // InterpreterRuntime::resolve_get_put sets field_holder and finally release-stores put_code.
2357     // We have seen the released put_code above and will read the corresponding field_holder and init_state
2358     // (ordered by compare-branch-isync).
2359     __ ld(field_holder, ResolvedFieldEntry::field_holder_offset(), Rcache);
2360     __ clinit_barrier(field_holder, R16_thread, &L_done, /*L_slow_path*/ nullptr);
2361   } else {
2362     __ b(L_done);
2363   }
2364 
2365   // resolve first time through
2366   // Class initialization barrier slow path lands here as well.
2367   __ bind(L_clinit_barrier_slow);
2368   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2369   __ li(R4_ARG2, code);
2370   __ call_VM_preemptable(noreg, entry, R4_ARG2);
2371 
2372   // Update registers with resolved info
2373   __ load_field_entry(Rcache, index);
2374   __ bind(L_done);
2375 }
2376 
2377 void TemplateTable::load_resolved_field_entry(Register obj,
2378                                               Register cache,
2379                                               Register tos_state,
2380                                               Register offset,
2381                                               Register flags,
2382                                               bool is_static = false) {
2383   assert_different_registers(cache, tos_state, flags, offset);
2384 
2385   // Field offset
2386   __ load_sized_value(offset, in_bytes(ResolvedFieldEntry::field_offset_offset()), cache, sizeof(int), true /*is_signed*/);
2387 
2388   // Flags
2389   __ lbz(flags, in_bytes(ResolvedFieldEntry::flags_offset()), cache);
2390 
2391   if (tos_state != noreg) {
2392     __ lbz(tos_state, in_bytes(ResolvedFieldEntry::type_offset()), cache);
2393   }
2394 
2395   // Klass overwrite register
2396   if (is_static) {
2397     __ ld(obj, in_bytes(ResolvedFieldEntry::field_holder_offset()), cache);
2398     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2399     __ ld(obj, mirror_offset, obj);
2400     __ resolve_oop_handle(obj, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
2401   }
2402 }
2403 
2404 void TemplateTable::load_resolved_method_entry_special_or_static(Register cache,
2405                                                                  Register method,
2406                                                                  Register flags) {
2407   assert_different_registers(cache, method, flags);
2408 
2409   // determine constant pool cache field offsets
2410   resolve_cache_and_index_for_method(f1_byte, cache, method /* tmp */);
2411   if (flags != noreg) {
2412     __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2413   }
2414   __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), cache);
2415 }
2416 
2417 void TemplateTable::load_resolved_method_entry_handle(Register cache,
2418                                                       Register method,
2419                                                       Register ref_index,
2420                                                       Register flags) {
2421   // setup registers
2422   assert_different_registers(cache, method, ref_index, flags);
2423 
2424   // determine constant pool cache field offsets
2425   resolve_cache_and_index_for_method(f1_byte, cache, method /* tmp */);
2426   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2427 
2428   // maybe push appendix to arguments (just before return address)
2429   Label L_no_push;
2430   __ testbitdi(CR0, R0, flags, ResolvedMethodEntry::has_appendix_shift);
2431   __ bfalse(CR0, L_no_push);
2432   // invokehandle uses an index into the resolved references array
2433   __ lhz(ref_index, in_bytes(ResolvedMethodEntry::resolved_references_index_offset()), cache);
2434   // Push the appendix as a trailing parameter.
2435   // This must be done before we get the receiver,
2436   // since the parameter_size includes it.
2437   Register appendix = method;
2438   assert(cache->is_nonvolatile(), "C-call in resolve_oop_handle");
2439   __ load_resolved_reference_at_index(appendix, ref_index, R11_scratch1, R12_scratch2);
2440   __ verify_oop(appendix);
2441   __ push_ptr(appendix); // push appendix (MethodType, CallSite, etc.)
2442   __ bind(L_no_push);
2443 
2444   __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), cache);
2445 }
2446 
2447 void TemplateTable::load_resolved_method_entry_interface(Register cache,
2448                                                          Register klass,
2449                                                          Register method_or_table_index,
2450                                                          Register flags) {
2451   // setup registers
2452   assert_different_registers(method_or_table_index, cache, flags);
2453   assert(klass == noreg, "to be determined by caller");
2454   assert(method_or_table_index == noreg, "to be determined by caller");
2455 
2456   // determine constant pool cache field offsets
2457   resolve_cache_and_index_for_method(f1_byte, cache, flags /* tmp */);
2458   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2459 }
2460 
2461 void TemplateTable::load_resolved_method_entry_virtual(Register cache,
2462                                                        Register method_or_table_index,
2463                                                        Register flags) {
2464   // setup registers
2465   assert_different_registers(cache, flags);
2466   assert(method_or_table_index == noreg, "to be determined by caller");
2467 
2468   // determine constant pool cache field offsets
2469   resolve_cache_and_index_for_method(f2_byte, cache, flags /* tmp */);
2470   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2471 }
2472 
2473 // Sets registers:
2474 //   `method`   Target method for invokedynamic
2475 //   R3_RET     Return address for invoke
2476 //
2477 // Kills: R11, R21, R30, R31
2478 void TemplateTable::load_invokedynamic_entry(Register method) {
2479   // setup registers
2480   const Register ret_addr = R3_RET;
2481   const Register appendix = R30;
2482   const Register cache    = R31;
2483   const Register index    = R21_tmp1;
2484   const Register tmp      = R11_scratch1;
2485   assert_different_registers(method, appendix, cache, index, tmp);
2486 
2487   Label resolved;
2488 
2489   __ load_resolved_indy_entry(cache, index);
2490   __ ld_ptr(method, in_bytes(ResolvedIndyEntry::method_offset()), cache);
2491 
2492   // The invokedynamic is unresolved iff method is null
2493   __ cmpdi(CR0, method, 0);
2494   __ bne(CR0, resolved);
2495 
2496   Bytecodes::Code code = bytecode();
2497 
2498   // Call to the interpreter runtime to resolve invokedynamic
2499   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2500   __ li(R4_ARG2, code);
2501   __ call_VM(noreg, entry, R4_ARG2, true);
2502   // Update registers with resolved info
2503   __ load_resolved_indy_entry(cache, index);
2504   __ ld_ptr(method, in_bytes(ResolvedIndyEntry::method_offset()), cache);
2505 
2506   DEBUG_ONLY(__ cmpdi(CR0, method, 0));
2507   __ asm_assert_ne("Should be resolved by now");
2508   __ bind(resolved);
2509   __ isync(); // Order load wrt. succeeding loads.
2510 
2511   Label L_no_push;
2512   // Check if there is an appendix
2513   __ lbz(index, in_bytes(ResolvedIndyEntry::flags_offset()), cache);
2514   __ rldicl_(R0, index, 64-ResolvedIndyEntry::has_appendix_shift, 63);
2515   __ beq(CR0, L_no_push);
2516 
2517   // Get appendix
2518   __ lhz(index, in_bytes(ResolvedIndyEntry::resolved_references_index_offset()), cache);
2519   // Push the appendix as a trailing parameter
2520   assert(cache->is_nonvolatile(), "C-call in resolve_oop_handle");
2521   __ load_resolved_reference_at_index(appendix, index, /* temp */ ret_addr, tmp);
2522   __ verify_oop(appendix);
2523   __ push_ptr(appendix);   // push appendix (MethodType, CallSite, etc.)
2524   __ bind(L_no_push);
2525 
2526   // load return address
2527   {
2528     Register Rtable_addr = tmp;
2529     address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
2530 
2531     // compute return type
2532     __ lbz(index, in_bytes(ResolvedIndyEntry::result_type_offset()), cache);
2533     __ load_dispatch_table(Rtable_addr, (address*)table_addr);
2534     __ sldi(index, index, LogBytesPerWord);
2535     // Get return address.
2536     __ ldx(ret_addr, Rtable_addr, index);
2537   }
2538 }
2539 
2540 // ============================================================================
2541 // Field access
2542 
2543 // Volatile variables demand their effects be made known to all CPU's
2544 // in order. Store buffers on most chips allow reads & writes to
2545 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2546 // without some kind of memory barrier (i.e., it's not sufficient that
2547 // the interpreter does not reorder volatile references, the hardware
2548 // also must not reorder them).
2549 //
2550 // According to the new Java Memory Model (JMM):
2551 // (1) All volatiles are serialized wrt to each other. ALSO reads &
2552 //     writes act as acquire & release, so:
2553 // (2) A read cannot let unrelated NON-volatile memory refs that
2554 //     happen after the read float up to before the read. It's OK for
2555 //     non-volatile memory refs that happen before the volatile read to
2556 //     float down below it.
2557 // (3) Similar a volatile write cannot let unrelated NON-volatile
2558 //     memory refs that happen BEFORE the write float down to after the
2559 //     write. It's OK for non-volatile memory refs that happen after the
2560 //     volatile write to float up before it.
2561 //
2562 // We only put in barriers around volatile refs (they are expensive),
2563 // not _between_ memory refs (that would require us to track the
2564 // flavor of the previous memory refs). Requirements (2) and (3)
2565 // require some barriers before volatile stores and after volatile
2566 // loads. These nearly cover requirement (1) but miss the
2567 // volatile-store-volatile-load case.  This final case is placed after
2568 // volatile-stores although it could just as well go before
2569 // volatile-loads.
2570 
2571 // The registers cache and index expected to be set before call.
2572 // Correct values of the cache and index registers are preserved.
2573 // Kills:
2574 //   Rcache (if has_tos)
2575 //   Rscratch
2576 void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rscratch, bool is_static, bool has_tos) {
2577 
2578   assert_different_registers(Rcache, Rscratch);
2579 
2580   if (JvmtiExport::can_post_field_access()) {
2581     Label Lno_field_access_post;
2582 
2583     // Check if post field access in enabled.
2584     int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_access_count_addr(), R0, true);
2585     __ lwz(Rscratch, offs, Rscratch);
2586 
2587     __ cmpwi(CR0, Rscratch, 0);
2588     __ beq(CR0, Lno_field_access_post);
2589 
2590     // Post access enabled - do it!
2591     if (is_static) {
2592       __ li(R17_tos, 0);
2593     } else {
2594       if (has_tos) {
2595         // The fast bytecode versions have obj ptr in register.
2596         // Thus, save object pointer before call_VM() clobbers it
2597         // put object on tos where GC wants it.
2598         __ push_ptr(R17_tos);
2599       } else {
2600         // Load top of stack (do not pop the value off the stack).
2601         __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp);
2602       }
2603       __ verify_oop(R17_tos);
2604     }
2605     // tos:   object pointer or null if static
2606     // cache: cache entry pointer
2607     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R17_tos, Rcache);
2608     if (!is_static && has_tos) {
2609       // Restore object pointer.
2610       __ pop_ptr(R17_tos);
2611       __ verify_oop(R17_tos);
2612     } else {
2613       // Cache is still needed to get class or obj.
2614       __ load_field_entry(Rcache, Rscratch);
2615     }
2616 
2617     __ align(32, 12);
2618     __ bind(Lno_field_access_post);
2619   }
2620 }
2621 
2622 // kills R11_scratch1
2623 void TemplateTable::pop_and_check_object(Register Roop) {
2624   Register Rtmp = R11_scratch1;
2625 
2626   assert_different_registers(Rtmp, Roop);
2627   __ pop_ptr(Roop);
2628   // For field access must check obj.
2629   __ null_check_throw(Roop, -1, Rtmp);
2630   __ verify_oop(Roop);
2631 }
2632 
2633 // PPC64: implement volatile loads as fence-store-acquire.
2634 void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2635   transition(vtos, vtos);
2636 
2637   Label Lacquire, Lisync;
2638 
2639   const Register Rcache        = R3_ARG1,
2640                  Rclass_or_obj = R22_tmp2, // Needs to survive C call.
2641                  Roffset       = R23_tmp3, // Needs to survive C call.
2642                  Rtos_state    = R30,      // Needs to survive C call.
2643                  Rflags        = R31,      // Needs to survive C call.
2644                  Rbtable       = R5_ARG3,
2645                  Rbc           = R30,
2646                  Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
2647                  // R12_scratch2 used by load_field_cp_cache_entry
2648 
2649   static address field_branch_table[number_of_states],
2650                  static_branch_table[number_of_states];
2651 
2652   address* branch_table = (is_static || rc == may_not_rewrite) ? static_branch_table : field_branch_table;
2653 
2654   // Get field offset.
2655   resolve_cache_and_index_for_field(byte_no, Rcache, Rscratch);
2656 
2657   // JVMTI support
2658   jvmti_post_field_access(Rcache, Rscratch, is_static, false);
2659 
2660   // Load after possible GC.
2661   load_resolved_field_entry(Rclass_or_obj, Rcache, Rtos_state, Roffset, Rflags, is_static); // Uses R11, R12
2662 
2663   // Load pointer to branch table.
2664   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
2665 
2666   // Get volatile flag.
2667   __ rldicl(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
2668   // Note: sync is needed before volatile load on PPC64.
2669 
2670 #ifdef ASSERT
2671   Label LFlagInvalid;
2672   __ cmpldi(CR0, Rtos_state, number_of_states);
2673   __ bge(CR0, LFlagInvalid);
2674 #endif
2675 
2676   // Load from branch table and dispatch (volatile case: one instruction ahead).
2677   __ sldi(Rtos_state, Rtos_state, LogBytesPerWord);
2678   __ cmpwi(CR2, Rscratch, 1); // Volatile?
2679   if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
2680     __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile ? size of 1 instruction : 0.
2681   }
2682   __ ldx(Rbtable, Rbtable, Rtos_state);
2683 
2684   // Get the obj from stack.
2685   if (!is_static) {
2686     pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
2687   } else {
2688     __ verify_oop(Rclass_or_obj);
2689   }
2690 
2691   if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
2692     __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
2693   }
2694   __ mtctr(Rbtable);
2695   __ bctr();
2696 
2697 #ifdef ASSERT
2698   __ bind(LFlagInvalid);
2699   __ stop("got invalid flag");
2700 #endif
2701 
2702   if (!is_static && rc == may_not_rewrite) {
2703     // We reuse the code from is_static.  It's jumped to via the table above.
2704     return;
2705   }
2706 
2707 #ifdef ASSERT
2708   // __ bind(Lvtos);
2709   address pc_before_fence = __ pc();
2710   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2711   assert(__ pc() - pc_before_fence == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
2712   assert(branch_table[vtos] == nullptr, "can't compute twice");
2713   branch_table[vtos] = __ pc(); // non-volatile_entry point
2714   __ stop("vtos unexpected");
2715 #endif
2716 
2717   __ align(32, 28, 28); // Align load.
2718   // __ bind(Ldtos);
2719   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2720   assert(branch_table[dtos] == nullptr, "can't compute twice");
2721   branch_table[dtos] = __ pc(); // non-volatile_entry point
2722   __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
2723   __ push(dtos);
2724   if (!is_static && rc == may_rewrite) {
2725     patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch);
2726   }
2727   {
2728     Label acquire_double;
2729     __ beq(CR2, acquire_double); // Volatile?
2730     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2731 
2732     __ bind(acquire_double);
2733     __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
2734     __ beq_predict_taken(CR0, Lisync);
2735     __ b(Lisync); // In case of NAN.
2736   }
2737 
2738   __ align(32, 28, 28); // Align load.
2739   // __ bind(Lftos);
2740   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2741   assert(branch_table[ftos] == nullptr, "can't compute twice");
2742   branch_table[ftos] = __ pc(); // non-volatile_entry point
2743   __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
2744   __ push(ftos);
2745   if (!is_static && rc == may_rewrite) {
2746     patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch);
2747   }
2748   {
2749     Label acquire_float;
2750     __ beq(CR2, acquire_float); // Volatile?
2751     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2752 
2753     __ bind(acquire_float);
2754     __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
2755     __ beq_predict_taken(CR0, Lisync);
2756     __ b(Lisync); // In case of NAN.
2757   }
2758 
2759   __ align(32, 28, 28); // Align load.
2760   // __ bind(Litos);
2761   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2762   assert(branch_table[itos] == nullptr, "can't compute twice");
2763   branch_table[itos] = __ pc(); // non-volatile_entry point
2764   __ lwax(R17_tos, Rclass_or_obj, Roffset);
2765   __ push(itos);
2766   if (!is_static && rc == may_rewrite) {
2767     patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch);
2768   }
2769   __ beq(CR2, Lacquire); // Volatile?
2770   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2771 
2772   __ align(32, 28, 28); // Align load.
2773   // __ bind(Lltos);
2774   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2775   assert(branch_table[ltos] == nullptr, "can't compute twice");
2776   branch_table[ltos] = __ pc(); // non-volatile_entry point
2777   __ ldx(R17_tos, Rclass_or_obj, Roffset);
2778   __ push(ltos);
2779   if (!is_static && rc == may_rewrite) {
2780     patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch);
2781   }
2782   __ beq(CR2, Lacquire); // Volatile?
2783   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2784 
2785   __ align(32, 28, 28); // Align load.
2786   // __ bind(Lbtos);
2787   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2788   assert(branch_table[btos] == nullptr, "can't compute twice");
2789   branch_table[btos] = __ pc(); // non-volatile_entry point
2790   __ lbzx(R17_tos, Rclass_or_obj, Roffset);
2791   __ extsb(R17_tos, R17_tos);
2792   __ push(btos);
2793   if (!is_static && rc == may_rewrite) {
2794     patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
2795   }
2796   __ beq(CR2, Lacquire); // Volatile?
2797   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2798 
2799   __ align(32, 28, 28); // Align load.
2800   // __ bind(Lztos); (same code as btos)
2801   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2802   assert(branch_table[ztos] == nullptr, "can't compute twice");
2803   branch_table[ztos] = __ pc(); // non-volatile_entry point
2804   __ lbzx(R17_tos, Rclass_or_obj, Roffset);
2805   __ push(ztos);
2806   if (!is_static && rc == may_rewrite) {
2807     // use btos rewriting, no truncating to t/f bit is needed for getfield.
2808     patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
2809   }
2810   __ beq(CR2, Lacquire); // Volatile?
2811   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2812 
2813   __ align(32, 28, 28); // Align load.
2814   // __ bind(Lctos);
2815   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2816   assert(branch_table[ctos] == nullptr, "can't compute twice");
2817   branch_table[ctos] = __ pc(); // non-volatile_entry point
2818   __ lhzx(R17_tos, Rclass_or_obj, Roffset);
2819   __ push(ctos);
2820   if (!is_static && rc == may_rewrite) {
2821     patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch);
2822   }
2823   __ beq(CR2, Lacquire); // Volatile?
2824   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2825 
2826   __ align(32, 28, 28); // Align load.
2827   // __ bind(Lstos);
2828   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2829   assert(branch_table[stos] == nullptr, "can't compute twice");
2830   branch_table[stos] = __ pc(); // non-volatile_entry point
2831   __ lhax(R17_tos, Rclass_or_obj, Roffset);
2832   __ push(stos);
2833   if (!is_static && rc == may_rewrite) {
2834     patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch);
2835   }
2836   __ beq(CR2, Lacquire); // Volatile?
2837   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2838 
2839   __ align(32, 28, 28); // Align load.
2840   // __ bind(Latos);
2841   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2842   assert(branch_table[atos] == nullptr, "can't compute twice");
2843   branch_table[atos] = __ pc(); // non-volatile_entry point
2844   if (!Arguments::is_valhalla_enabled()) {
2845     do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
2846     __ verify_oop(R17_tos);
2847     __ push(atos);
2848     //__ dcbt(R17_tos); // prefetch
2849     if (!is_static && rc == may_rewrite) {
2850       patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
2851     }
2852   } else { // Valhalla
2853     if (is_static) {
2854       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
2855       __ verify_oop(R17_tos);
2856       __ push(atos);
2857     } else {
2858       Label is_flat;
2859       __ test_field_is_flat(Rflags, is_flat);
2860       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
2861       __ verify_oop(R17_tos);
2862       __ push(atos);
2863       if (rc == may_rewrite) {
2864         patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
2865       }
2866       __ beq(CR2, Lacquire); // Volatile?
2867       __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2868 
2869       __ bind(is_flat);
2870       // field is flat (null-free or nullable with a null-marker)
2871       __ mr(R17_tos, Rclass_or_obj);
2872       __ read_flat_field(Rcache, R17_tos);
2873       __ verify_oop(R17_tos);
2874       __ push(atos);
2875       if (rc == may_rewrite) {
2876         patch_bytecode(Bytecodes::_fast_vgetfield, Rbc, Rscratch);
2877       }
2878     }
2879   }
2880   __ beq(CR2, Lacquire); // Volatile?
2881   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2882 
2883   __ align(32, 12);
2884   __ bind(Lacquire);
2885   __ twi_0(R17_tos);
2886   __ bind(Lisync);
2887   __ isync(); // acquire
2888 
2889 #ifdef ASSERT
2890   for (int i = 0; i<number_of_states; ++i) {
2891     assert(branch_table[i], "get initialization");
2892     //tty->print_cr("get: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
2893     //              is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
2894   }
2895 #endif
2896 }
2897 
2898 void TemplateTable::getfield(int byte_no) {
2899   getfield_or_static(byte_no, false);
2900 }
2901 
2902 void TemplateTable::nofast_getfield(int byte_no) {
2903   getfield_or_static(byte_no, false, may_not_rewrite);
2904 }
2905 
2906 void TemplateTable::getstatic(int byte_no) {
2907   getfield_or_static(byte_no, true);
2908 }
2909 
2910 // The registers cache and index expected to be set before call.
2911 // The function may destroy various registers, just not the cache and index registers.
2912 void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, bool is_static) {
2913 
2914   assert_different_registers(Rcache, Rscratch);
2915 
2916   if (JvmtiExport::can_post_field_modification()) {
2917     Label Lno_field_mod_post;
2918 
2919     // Check if post field access in enabled.
2920     int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_modification_count_addr(), R0, true);
2921     __ lwz(Rscratch, offs, Rscratch);
2922 
2923     __ cmpwi(CR0, Rscratch, 0);
2924     __ beq(CR0, Lno_field_mod_post);
2925 
2926     // Do the post
2927     const Register Robj = Rscratch;
2928 
2929     if (is_static) {
2930       // Life is simple. Null out the object pointer.
2931       __ li(Robj, 0);
2932     } else {
2933       // In case of the fast versions, value lives in registers => put it back on tos.
2934       int offs = Interpreter::expr_offset_in_bytes(0);
2935       Register base = R15_esp;
2936       switch(bytecode()) {
2937         case Bytecodes::_fast_vputfield: // fall through
2938         case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break;
2939         case Bytecodes::_fast_iputfield: // Fall through
2940         case Bytecodes::_fast_bputfield: // Fall through
2941         case Bytecodes::_fast_zputfield: // Fall through
2942         case Bytecodes::_fast_cputfield: // Fall through
2943         case Bytecodes::_fast_sputfield: __ push_i(); offs+=  Interpreter::stackElementSize; break;
2944         case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break;
2945         case Bytecodes::_fast_fputfield: __ push_f(); offs+=  Interpreter::stackElementSize; break;
2946         case Bytecodes::_fast_dputfield: __ push_d(); offs+=2*Interpreter::stackElementSize; break;
2947         default: {
2948           offs = 0;
2949           base = Robj;
2950           const Register Rtos_state = Robj;
2951           Label is_one_slot;
2952           // Life is harder. The stack holds the value on top, followed by the
2953           // object. We don't know the size of the value, though; it could be
2954           // one or two words depending on its type. As a result, we must find
2955           // the type to determine where the object is.
2956           __ lbz(Rtos_state, in_bytes(ResolvedFieldEntry::type_offset()), Rcache);
2957 
2958           __ cmpwi(CR0, Rtos_state, ltos);
2959           __ cmpwi(CR1, Rtos_state, dtos);
2960           __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(1));
2961           __ crnor(CR0, Assembler::equal, CR1, Assembler::equal);
2962           __ beq(CR0, is_one_slot);
2963           __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(2));
2964           __ bind(is_one_slot);
2965           break;
2966         }
2967       }
2968       __ ld(Robj, offs, base);
2969       __ verify_oop(Robj);
2970     }
2971 
2972     // Pass arguments without register clashes (R16_thread passed by call_VM)
2973     __ mr_if_needed(R4_ARG2, Robj);
2974     assert(Rcache != R4_ARG2, "smashed argument");
2975     __ mr_if_needed(R5_ARG3, Rcache);
2976     __ addi(R6_ARG4, R15_esp, Interpreter::expr_offset_in_bytes(0)); // set R6_ARG4 last (may use same reg as other args)
2977     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification));
2978     __ load_field_entry(Rcache, Rscratch);
2979 
2980     // In case of the fast versions, value lives in registers => put it back on tos.
2981     switch(bytecode()) {
2982       case Bytecodes::_fast_vputfield: // fall through
2983       case Bytecodes::_fast_aputfield: __ pop_ptr(); break;
2984       case Bytecodes::_fast_iputfield: // Fall through
2985       case Bytecodes::_fast_bputfield: // Fall through
2986       case Bytecodes::_fast_zputfield: // Fall through
2987       case Bytecodes::_fast_cputfield: // Fall through
2988       case Bytecodes::_fast_sputfield: __ pop_i(); break;
2989       case Bytecodes::_fast_lputfield: __ pop_l(); break;
2990       case Bytecodes::_fast_fputfield: __ pop_f(); break;
2991       case Bytecodes::_fast_dputfield: __ pop_d(); break;
2992       default: break; // Nothin' to do.
2993     }
2994 
2995     __ align(32, 12);
2996     __ bind(Lno_field_mod_post);
2997   }
2998 }
2999 
3000 // PPC64: implement volatile stores as release-store (return bytecode contains an additional release).
3001 void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
3002   Label Lvolatile;
3003 
3004   const Register Rcache        = R6_ARG4,  // Do not use ARG1-3 (causes trouble in jvmti_post_field_mod or write_flat_field).
3005                  Rclass_or_obj = R31,      // Needs to survive C call.
3006                  Roffset       = R22_tmp2, // Needs to survive C call.
3007                  Rtos_state    = R23_tmp3, // Needs to survive C call.
3008                  Rflags        = R30,      // Needs to survive C call.
3009                  Rbtable       = R4_ARG2,
3010                  Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
3011                  Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
3012                  Rscratch3     = R10_ARG8,
3013                  Rbc           = Rscratch3;
3014   const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store).
3015 
3016   static address field_rw_branch_table[number_of_states],
3017                  field_norw_branch_table[number_of_states],
3018                  static_branch_table[number_of_states];
3019 
3020   address* branch_table = is_static ? static_branch_table :
3021     (rc == may_rewrite ? field_rw_branch_table : field_norw_branch_table);
3022 
3023   // Stack (grows up):
3024   //  value
3025   //  obj
3026 
3027   // Load the field offset.
3028   resolve_cache_and_index_for_field(byte_no, Rcache, Rscratch);
3029   jvmti_post_field_mod(Rcache, Rscratch, is_static);
3030   load_resolved_field_entry(Rclass_or_obj, Rcache, Rtos_state, Roffset, Rflags, is_static); // Uses R11, R12
3031 
3032   // Load pointer to branch table.
3033   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
3034 
3035   // Get volatile flag.
3036   __ rldicl(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3037 
3038 #ifdef ASSERT
3039   Label LFlagInvalid;
3040   __ cmpldi(CR0, Rtos_state, number_of_states);
3041   __ bge(CR0, LFlagInvalid);
3042 #endif
3043 
3044   // Load from branch table and dispatch (volatile case: one instruction ahead).
3045   __ sldi(Rtos_state, Rtos_state, LogBytesPerWord);
3046   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3047     __ cmpwi(CR_is_vol, Rscratch, 1);  // Volatile?
3048   }
3049   __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile? size of instruction 1 : 0.
3050   __ ldx(Rbtable, Rbtable, Rtos_state);
3051 
3052   __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
3053   __ mtctr(Rbtable);
3054   __ bctr();
3055 
3056 #ifdef ASSERT
3057   __ bind(LFlagInvalid);
3058   __ stop("got invalid flag");
3059 
3060   // __ bind(Lvtos);
3061   address pc_before_release = __ pc();
3062   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3063   assert(__ pc() - pc_before_release == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
3064   assert(branch_table[vtos] == nullptr, "can't compute twice");
3065   branch_table[vtos] = __ pc(); // non-volatile_entry point
3066   __ stop("vtos unexpected");
3067 #endif
3068 
3069   __ align(32, 28, 28); // Align pop.
3070   // __ bind(Ldtos);
3071   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3072   assert(branch_table[dtos] == nullptr, "can't compute twice");
3073   branch_table[dtos] = __ pc(); // non-volatile_entry point
3074   __ pop(dtos);
3075   if (!is_static) {
3076     pop_and_check_object(Rclass_or_obj);  // Kills R11_scratch1.
3077   }
3078   __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
3079   if (!is_static && rc == may_rewrite) {
3080     patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no);
3081   }
3082   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3083     __ beq(CR_is_vol, Lvolatile); // Volatile?
3084   }
3085   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3086 
3087   __ align(32, 28, 28); // Align pop.
3088   // __ bind(Lftos);
3089   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3090   assert(branch_table[ftos] == nullptr, "can't compute twice");
3091   branch_table[ftos] = __ pc(); // non-volatile_entry point
3092   __ pop(ftos);
3093   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3094   __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
3095   if (!is_static && rc == may_rewrite) {
3096     patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no);
3097   }
3098   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3099     __ beq(CR_is_vol, Lvolatile); // Volatile?
3100   }
3101   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3102 
3103   __ align(32, 28, 28); // Align pop.
3104   // __ bind(Litos);
3105   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3106   assert(branch_table[itos] == nullptr, "can't compute twice");
3107   branch_table[itos] = __ pc(); // non-volatile_entry point
3108   __ pop(itos);
3109   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3110   __ stwx(R17_tos, Rclass_or_obj, Roffset);
3111   if (!is_static && rc == may_rewrite) {
3112     patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no);
3113   }
3114   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3115     __ beq(CR_is_vol, Lvolatile); // Volatile?
3116   }
3117   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3118 
3119   __ align(32, 28, 28); // Align pop.
3120   // __ bind(Lltos);
3121   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3122   assert(branch_table[ltos] == nullptr, "can't compute twice");
3123   branch_table[ltos] = __ pc(); // non-volatile_entry point
3124   __ pop(ltos);
3125   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3126   __ stdx(R17_tos, Rclass_or_obj, Roffset);
3127   if (!is_static && rc == may_rewrite) {
3128     patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no);
3129   }
3130   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3131     __ beq(CR_is_vol, Lvolatile); // Volatile?
3132   }
3133   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3134 
3135   __ align(32, 28, 28); // Align pop.
3136   // __ bind(Lbtos);
3137   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3138   assert(branch_table[btos] == nullptr, "can't compute twice");
3139   branch_table[btos] = __ pc(); // non-volatile_entry point
3140   __ pop(btos);
3141   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3142   __ stbx(R17_tos, Rclass_or_obj, Roffset);
3143   if (!is_static && rc == may_rewrite) {
3144     patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no);
3145   }
3146   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3147     __ beq(CR_is_vol, Lvolatile); // Volatile?
3148   }
3149   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3150 
3151   __ align(32, 28, 28); // Align pop.
3152   // __ bind(Lztos);
3153   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3154   assert(branch_table[ztos] == nullptr, "can't compute twice");
3155   branch_table[ztos] = __ pc(); // non-volatile_entry point
3156   __ pop(ztos);
3157   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3158   __ andi(R17_tos, R17_tos, 0x1);
3159   __ stbx(R17_tos, Rclass_or_obj, Roffset);
3160   if (!is_static && rc == may_rewrite) {
3161     patch_bytecode(Bytecodes::_fast_zputfield, Rbc, Rscratch, true, byte_no);
3162   }
3163   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3164     __ beq(CR_is_vol, Lvolatile); // Volatile?
3165   }
3166   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3167 
3168   __ align(32, 28, 28); // Align pop.
3169   // __ bind(Lctos);
3170   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3171   assert(branch_table[ctos] == nullptr, "can't compute twice");
3172   branch_table[ctos] = __ pc(); // non-volatile_entry point
3173   __ pop(ctos);
3174   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1..
3175   __ sthx(R17_tos, Rclass_or_obj, Roffset);
3176   if (!is_static && rc == may_rewrite) {
3177     patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no);
3178   }
3179   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3180     __ beq(CR_is_vol, Lvolatile); // Volatile?
3181   }
3182   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3183 
3184   __ align(32, 28, 28); // Align pop.
3185   // __ bind(Lstos);
3186   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3187   assert(branch_table[stos] == nullptr, "can't compute twice");
3188   branch_table[stos] = __ pc(); // non-volatile_entry point
3189   __ pop(stos);
3190   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3191   __ sthx(R17_tos, Rclass_or_obj, Roffset);
3192   if (!is_static && rc == may_rewrite) {
3193     patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no);
3194   }
3195   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3196     __ beq(CR_is_vol, Lvolatile); // Volatile?
3197   }
3198   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3199 
3200   __ align(32, 28, 28); // Align pop.
3201   // __ bind(Latos);
3202   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3203   assert(branch_table[atos] == nullptr, "can't compute twice");
3204   branch_table[atos] = __ pc(); // non-volatile_entry point
3205   __ pop(atos);
3206   if (!Arguments::is_valhalla_enabled()) {
3207     if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1
3208     do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3209     if (!is_static && rc == may_rewrite) {
3210       patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no);
3211     }
3212   } else { // Valhalla
3213     if (is_static) {
3214       Label is_nullable;
3215       __ test_field_is_not_null_free_inline_type(Rflags, is_nullable);
3216       __ null_check_throw(R17_tos, -1, Rscratch);
3217       __ align(32, 12);
3218       __ bind(is_nullable);
3219       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3220     } else {
3221       Label null_free_reference, is_flat, rewrite_inline;
3222       __ test_field_is_flat(Rflags, is_flat);
3223       __ test_field_is_null_free_inline_type(Rflags, null_free_reference);
3224       pop_and_check_object(Rclass_or_obj);
3225       // Store into the field
3226       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3227       if (rc == may_rewrite) {
3228         patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no);
3229       }
3230       if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3231         __ beq(CR_is_vol, Lvolatile); // Volatile?
3232       }
3233       __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3234 
3235       // Implementation of the inline type semantic
3236       __ bind(null_free_reference);
3237       __ null_check_throw(R17_tos, -1, Rscratch);
3238       pop_and_check_object(Rclass_or_obj);
3239       // Store into the field
3240       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3241       __ b(rewrite_inline);
3242 
3243       __ bind(is_flat);
3244       pop_and_check_object(Rclass_or_obj);
3245       __ write_flat_field(Rcache, Rscratch, Rscratch2, Rclass_or_obj, Roffset, R17_tos);
3246       __ bind(rewrite_inline);
3247       if (rc == may_rewrite) {
3248         patch_bytecode(Bytecodes::_fast_vputfield, Rbc, Rscratch, true, byte_no);
3249       }
3250     }
3251   } // Valhalla
3252   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3253     __ beq(CR_is_vol, Lvolatile); // Volatile?
3254     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3255 
3256     __ align(32, 12);
3257     __ bind(Lvolatile);
3258     __ fence();
3259   }
3260   // fallthru: __ b(Lexit);
3261 
3262 #ifdef ASSERT
3263   for (int i = 0; i<number_of_states; ++i) {
3264     assert(branch_table[i], "put initialization");
3265     //tty->print_cr("put: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
3266     //              is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
3267   }
3268 #endif
3269 }
3270 
3271 void TemplateTable::putfield(int byte_no) {
3272   putfield_or_static(byte_no, false);
3273 }
3274 
3275 void TemplateTable::nofast_putfield(int byte_no) {
3276   putfield_or_static(byte_no, false, may_not_rewrite);
3277 }
3278 
3279 void TemplateTable::putstatic(int byte_no) {
3280   putfield_or_static(byte_no, true);
3281 }
3282 
3283 // On PPC64, we have a different jvmti_post_field_mod which does the job.
3284 void TemplateTable::jvmti_post_fast_field_mod() {
3285   __ should_not_reach_here();
3286 }
3287 
3288 void TemplateTable::fast_storefield(TosState state) {
3289   transition(state, vtos);
3290 
3291   const Register Rcache        = R6_ARG4,  // Do not use ARG1-3 (causes trouble in jvmti_post_field_mod or write_flat_field).
3292                  Rclass_or_obj = R31,      // Needs to survive C call.
3293                  Roffset       = R22_tmp2, // Needs to survive C call.
3294                  Rflags        = R3_ARG1,
3295                  Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
3296                  Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
3297                  Rscratch3     = R4_ARG2;
3298   const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store).
3299 
3300   // Constant pool already resolved => Load flags and offset of field.
3301   __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
3302   jvmti_post_field_mod(Rcache, Rscratch, false /* not static */);
3303   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3304 
3305   // Get the obj and the final store addr.
3306   pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
3307 
3308   // Get volatile flag.
3309   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3310   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpdi(CR_is_vol, Rscratch, 1); }
3311   {
3312     Label LnotVolatile;
3313     __ beq(CR0, LnotVolatile);
3314     __ release();
3315     __ align(32, 12);
3316     __ bind(LnotVolatile);
3317   }
3318 
3319   // Do the store and fencing.
3320   switch(bytecode()) {
3321     case Bytecodes::_fast_vputfield:
3322     {
3323       Label is_flat, done;
3324       __ test_field_is_flat(Rflags, is_flat);
3325       __ null_check_throw(Rclass_or_obj, -1, Rscratch);
3326       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3327       __ b(done);
3328       __ bind(is_flat);
3329       __ write_flat_field(Rcache, Rscratch, Rscratch2, Rclass_or_obj, Roffset, R17_tos);
3330       __ bind(done);
3331       break;
3332     }
3333 
3334     case Bytecodes::_fast_aputfield:
3335       // Store into the field.
3336       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3337       break;
3338 
3339     case Bytecodes::_fast_iputfield:
3340       __ stwx(R17_tos, Rclass_or_obj, Roffset);
3341       break;
3342 
3343     case Bytecodes::_fast_lputfield:
3344       __ stdx(R17_tos, Rclass_or_obj, Roffset);
3345       break;
3346 
3347     case Bytecodes::_fast_zputfield:
3348       __ andi(R17_tos, R17_tos, 0x1);  // boolean is true if LSB is 1
3349       // fall through to bputfield
3350     case Bytecodes::_fast_bputfield:
3351       __ stbx(R17_tos, Rclass_or_obj, Roffset);
3352       break;
3353 
3354     case Bytecodes::_fast_cputfield:
3355     case Bytecodes::_fast_sputfield:
3356       __ sthx(R17_tos, Rclass_or_obj, Roffset);
3357       break;
3358 
3359     case Bytecodes::_fast_fputfield:
3360       __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
3361       break;
3362 
3363     case Bytecodes::_fast_dputfield:
3364       __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
3365       break;
3366 
3367     default: ShouldNotReachHere();
3368   }
3369 
3370   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3371     Label LVolatile;
3372     __ beq(CR_is_vol, LVolatile);
3373     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3374 
3375     __ align(32, 12);
3376     __ bind(LVolatile);
3377     __ fence();
3378   }
3379 }
3380 
3381 void TemplateTable::fast_accessfield(TosState state) {
3382   transition(atos, state);
3383 
3384   Label LisVolatile;
3385   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
3386 
3387   const Register Rcache        = R3_ARG1,
3388                  Rclass_or_obj = R17_tos,
3389                  Roffset       = R22_tmp2,
3390                  Rflags        = R23_tmp3,
3391                  Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
3392                  // R12_scratch2 used by load_field_cp_cache_entry
3393 
3394   // Constant pool already resolved. Get the field offset.
3395   __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
3396   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3397 
3398   // JVMTI support
3399   jvmti_post_field_access(Rcache, Rscratch, false, true);
3400 
3401   // Get the load address.
3402   __ null_check_throw(Rclass_or_obj, -1, Rscratch);
3403 
3404   // Get volatile flag.
3405   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3406   __ bne(CR0, LisVolatile);
3407 
3408   switch(bytecode()) {
3409     case Bytecodes::_fast_vgetfield:
3410     {
3411       // field is flat
3412       __ read_flat_field(Rcache, R17_tos);
3413       __ verify_oop(R17_tos);
3414       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3415 
3416       __ bind(LisVolatile);
3417       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3418       __ read_flat_field(Rcache, R17_tos);
3419       __ verify_oop(R17_tos);
3420       // memory barrier in read_flat_field
3421       break;
3422     }
3423     case Bytecodes::_fast_agetfield:
3424     {
3425       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3426       __ verify_oop(R17_tos);
3427       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3428 
3429       __ bind(LisVolatile);
3430       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3431       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3432       __ verify_oop(R17_tos);
3433       __ twi_0(R17_tos);
3434       __ isync();
3435       break;
3436     }
3437     case Bytecodes::_fast_igetfield:
3438     {
3439       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3440       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3441 
3442       __ bind(LisVolatile);
3443       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3444       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3445       __ twi_0(R17_tos);
3446       __ isync();
3447       break;
3448     }
3449     case Bytecodes::_fast_lgetfield:
3450     {
3451       __ ldx(R17_tos, Rclass_or_obj, Roffset);
3452       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3453 
3454       __ bind(LisVolatile);
3455       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3456       __ ldx(R17_tos, Rclass_or_obj, Roffset);
3457       __ twi_0(R17_tos);
3458       __ isync();
3459       break;
3460     }
3461     case Bytecodes::_fast_bgetfield:
3462     {
3463       __ lbzx(R17_tos, Rclass_or_obj, Roffset);
3464       __ extsb(R17_tos, R17_tos);
3465       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3466 
3467       __ bind(LisVolatile);
3468       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3469       __ lbzx(R17_tos, Rclass_or_obj, Roffset);
3470       __ twi_0(R17_tos);
3471       __ extsb(R17_tos, R17_tos);
3472       __ isync();
3473       break;
3474     }
3475     case Bytecodes::_fast_cgetfield:
3476     {
3477       __ lhzx(R17_tos, Rclass_or_obj, Roffset);
3478       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3479 
3480       __ bind(LisVolatile);
3481       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3482       __ lhzx(R17_tos, Rclass_or_obj, Roffset);
3483       __ twi_0(R17_tos);
3484       __ isync();
3485       break;
3486     }
3487     case Bytecodes::_fast_sgetfield:
3488     {
3489       __ lhax(R17_tos, Rclass_or_obj, Roffset);
3490       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3491 
3492       __ bind(LisVolatile);
3493       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3494       __ lhax(R17_tos, Rclass_or_obj, Roffset);
3495       __ twi_0(R17_tos);
3496       __ isync();
3497       break;
3498     }
3499     case Bytecodes::_fast_fgetfield:
3500     {
3501       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3502       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3503 
3504       __ bind(LisVolatile);
3505       Label Ldummy;
3506       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3507       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3508       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3509       __ bne_predict_not_taken(CR0, Ldummy);
3510       __ bind(Ldummy);
3511       __ isync();
3512       break;
3513     }
3514     case Bytecodes::_fast_dgetfield:
3515     {
3516       __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
3517       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3518 
3519       __ bind(LisVolatile);
3520       Label Ldummy;
3521       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3522       __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
3523       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3524       __ bne_predict_not_taken(CR0, Ldummy);
3525       __ bind(Ldummy);
3526       __ isync();
3527       break;
3528     }
3529     default: ShouldNotReachHere();
3530   }
3531 }
3532 
3533 void TemplateTable::fast_xaccess(TosState state) {
3534   transition(vtos, state);
3535 
3536   Label LisVolatile;
3537   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
3538   const Register Rcache        = R3_ARG1,
3539                  Rclass_or_obj = R17_tos,
3540                  Roffset       = R22_tmp2,
3541                  Rflags        = R23_tmp3,
3542                  Rscratch      = R11_scratch1;
3543                  // R12_scratch2 used by load_field_cp_cache_entry
3544 
3545   __ ld(Rclass_or_obj, 0, R18_locals);
3546 
3547   // Constant pool already resolved. Get the field offset.
3548   __ load_field_entry(Rcache, Rscratch, 2, /* for_fast_bytecode */ true);
3549   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3550 
3551   // JVMTI support not needed, since we switch back to single bytecode as soon as debugger attaches.
3552 
3553   // Needed to report exception at the correct bcp.
3554   __ addi(R14_bcp, R14_bcp, 1);
3555 
3556   // Get the load address.
3557   __ null_check_throw(Rclass_or_obj, -1, Rscratch);
3558 
3559   // Get volatile flag.
3560   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3561   __ bne(CR0, LisVolatile);
3562 
3563   switch(state) {
3564   case atos:
3565     {
3566       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3567       __ verify_oop(R17_tos);
3568       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3569 
3570       __ bind(LisVolatile);
3571       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3572       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3573       __ verify_oop(R17_tos);
3574       __ twi_0(R17_tos);
3575       __ isync();
3576       break;
3577     }
3578   case itos:
3579     {
3580       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3581       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3582 
3583       __ bind(LisVolatile);
3584       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3585       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3586       __ twi_0(R17_tos);
3587       __ isync();
3588       break;
3589     }
3590   case ftos:
3591     {
3592       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3593       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3594 
3595       __ bind(LisVolatile);
3596       Label Ldummy;
3597       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3598       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3599       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3600       __ bne_predict_not_taken(CR0, Ldummy);
3601       __ bind(Ldummy);
3602       __ isync();
3603       break;
3604     }
3605   default: ShouldNotReachHere();
3606   }
3607   __ addi(R14_bcp, R14_bcp, -1);
3608 }
3609 
3610 // ============================================================================
3611 // Calls
3612 
3613 void TemplateTable::prepare_invoke(Register Rcache,
3614                                    Register Rret_addr,// return address
3615                                    Register Rrecv,    // If caller wants to see it.
3616                                    Register Rscratch
3617                                    ) {
3618   // Determine flags.
3619   const Bytecodes::Code code = bytecode();
3620   const bool load_receiver = (Rrecv != noreg);
3621   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3622 
3623   // Load receiver if needed (after appendix is pushed so parameter size is correct).
3624   if (load_receiver) {
3625     Register Rparam_count = Rscratch;
3626     __ lhz(Rparam_count, in_bytes(ResolvedMethodEntry::num_parameters_offset()), Rcache);
3627     __ load_receiver(Rparam_count, Rrecv);
3628     __ verify_oop(Rrecv);
3629   }
3630 
3631   // Get return address.
3632   {
3633     Register Rtable_addr = Rscratch;
3634     Register Rret_type = Rret_addr;
3635     address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3636 
3637     __ lbz(Rret_type, in_bytes(ResolvedMethodEntry::type_offset()), Rcache);
3638     __ load_dispatch_table(Rtable_addr, (address*)table_addr);
3639     __ sldi(Rret_type, Rret_type, LogBytesPerWord);
3640     // Get return address.
3641     __ ldx(Rret_addr, Rtable_addr, Rret_type);
3642   }
3643 }
3644 
3645 // Helper for virtual calls. Load target out of vtable and jump off!
3646 // Kills all passed registers.
3647 void TemplateTable::generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp) {
3648 
3649   assert_different_registers(Rrecv_klass, Rtemp, Rret);
3650   const Register Rtarget_method = Rindex;
3651 
3652   // Get target method & entry point.
3653   const ByteSize base = Klass::vtable_start_offset();
3654   // Calc vtable addr scale the vtable index by 8.
3655   __ sldi(Rindex, Rindex, exact_log2(vtableEntry::size_in_bytes()));
3656   // Load target.
3657   __ addi(Rrecv_klass, Rrecv_klass, in_bytes(base + vtableEntry::method_offset()));
3658   __ ldx(Rtarget_method, Rindex, Rrecv_klass);
3659   // Argument and return type profiling.
3660   __ profile_arguments_type(Rtarget_method, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */, true);
3661   __ call_from_interpreter(Rtarget_method, Rret, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */);
3662 }
3663 
3664 // Virtual or final call. Final calls are rewritten on the fly to run through "fast_finalcall" next time.
3665 void TemplateTable::invokevirtual(int byte_no) {
3666   transition(vtos, vtos);
3667 
3668   Register Rret_addr = R5_ARG3,
3669            Rflags = R22_tmp2, // Should survive C call.
3670            Rrecv = R3_ARG1,
3671            Rrecv_klass = Rrecv,
3672            Rvtableindex_or_method = R31, // Should survive C call.
3673            Rnew_bc = R6_ARG4,
3674            Rcache = R7_ARG5;
3675 
3676   Label LnotFinal;
3677 
3678   load_resolved_method_entry_virtual(Rcache, noreg, Rflags);
3679 
3680   // Handle final method separately.
3681   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3682   __ bfalse(CR0, LnotFinal);
3683 
3684   if (RewriteBytecodes && !CDSConfig::is_using_archive() && !CDSConfig::is_dumping_static_archive()) {
3685     patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2);
3686   }
3687   invokevfinal_helper(Rcache, R11_scratch1, R12_scratch2, Rflags /* tmp */, Rrecv /* tmp */);
3688 
3689   __ align(32, 12);
3690   __ bind(LnotFinal);
3691   prepare_invoke(Rcache, Rret_addr, Rrecv, R11_scratch1);
3692 
3693   // Get vtable index.
3694   __ lhz(Rvtableindex_or_method, in_bytes(ResolvedMethodEntry::table_index_offset()), Rcache);
3695 
3696   // Get receiver klass.
3697   __ load_klass_check_null_throw(Rrecv_klass, Rrecv, R11_scratch1);
3698   __ verify_klass_ptr(Rrecv_klass);
3699   __ profile_virtual_call(Rrecv_klass, R11_scratch1, R12_scratch2);
3700 
3701   generate_vtable_call(Rrecv_klass, Rvtableindex_or_method, Rret_addr, R11_scratch1);
3702 }
3703 
3704 void TemplateTable::fast_invokevfinal(int byte_no) {
3705   transition(vtos, vtos);
3706 
3707   assert(byte_no == f2_byte, "use this argument");
3708   Register Rcache  = R31;
3709   __ load_method_entry(Rcache, R11_scratch1, 1, /* for_fast_bytecode */ true);
3710   invokevfinal_helper(Rcache, R11_scratch1, R12_scratch2, R22_tmp2, R23_tmp3);
3711 }
3712 
3713 void TemplateTable::invokevfinal_helper(Register Rcache,
3714                                         Register Rscratch1, Register Rscratch2, Register Rscratch3, Register Rscratch4) {
3715 
3716   assert_different_registers(Rcache, Rscratch1, Rscratch2, Rscratch3, Rscratch4);
3717 
3718   Register Rrecv     = Rscratch2,
3719            Rmethod   = Rscratch3,
3720            Rret_addr = Rscratch4;
3721   prepare_invoke(Rcache, Rret_addr, Rrecv, Rscratch1);
3722 
3723   // Receiver null check.
3724   __ null_check_throw(Rrecv, -1, Rscratch1);
3725 
3726   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3727 
3728   __ profile_final_call(Rrecv, Rscratch1);
3729   // Argument and return type profiling.
3730   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, true);
3731 
3732   // Do the call.
3733   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1, Rscratch2);
3734 }
3735 
3736 void TemplateTable::invokespecial(int byte_no) {
3737   assert(byte_no == f1_byte, "use this argument");
3738   transition(vtos, vtos);
3739 
3740   Register Rcache      = R3_ARG1,
3741            Rret_addr   = R4_ARG2,
3742            Rreceiver   = R5_ARG3,
3743            Rmethod     = R31;
3744 
3745   load_resolved_method_entry_special_or_static(Rcache,  // ResolvedMethodEntry*
3746                                                Rmethod, // Method*
3747                                                noreg);  // flags
3748   prepare_invoke(Rcache, Rret_addr, Rreceiver, R11_scratch1);
3749 
3750   // Receiver null check.
3751   __ null_check_throw(Rreceiver, -1, R11_scratch1);
3752 
3753   __ profile_call(R11_scratch1, R12_scratch2);
3754   // Argument and return type profiling.
3755   __ profile_arguments_type(Rmethod, R11_scratch1, R12_scratch2, false);
3756   __ call_from_interpreter(Rmethod, Rret_addr, R11_scratch1, R12_scratch2);
3757 }
3758 
3759 void TemplateTable::invokestatic(int byte_no) {
3760   assert(byte_no == f1_byte, "use this argument");
3761   transition(vtos, vtos);
3762 
3763   Register Rcache    = R3_ARG1,
3764            Rret_addr = R4_ARG2;
3765 
3766   load_resolved_method_entry_special_or_static(Rcache,     // ResolvedMethodEntry*
3767                                                R19_method, // Method*
3768                                                noreg);     // flags
3769   prepare_invoke(Rcache, Rret_addr, noreg, R11_scratch1);
3770 
3771   __ profile_call(R11_scratch1, R12_scratch2);
3772   // Argument and return type profiling.
3773   __ profile_arguments_type(R19_method, R11_scratch1, R12_scratch2, false);
3774   __ call_from_interpreter(R19_method, Rret_addr, R11_scratch1, R12_scratch2);
3775 }
3776 
3777 void TemplateTable::invokeinterface_object_method(Register Rrecv_klass,
3778                                                   Register Rret,
3779                                                   Register Rflags,
3780                                                   Register Rcache,
3781                                                   Register Rtemp1,
3782                                                   Register Rtemp2) {
3783 
3784   assert_different_registers(Rcache, Rret, Rrecv_klass, Rflags, Rtemp1, Rtemp2);
3785   Label LnotFinal;
3786 
3787   // Check for vfinal.
3788   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3789   __ bfalse(CR0, LnotFinal);
3790 
3791   Register Rscratch = Rflags, // Rflags is dead now.
3792            Rmethod  = Rtemp2,
3793            Rindex   = Rtemp2;
3794 
3795   // Final call case.
3796   __ profile_final_call(Rtemp1, Rscratch);
3797   // Argument and return type profiling.
3798   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3799   __ profile_arguments_type(Rmethod, Rscratch, Rrecv_klass /* scratch */, true);
3800   // Do the final call - the index (f2) contains the method.
3801   __ call_from_interpreter(Rmethod, Rret, Rscratch, Rrecv_klass /* scratch */);
3802 
3803   // Non-final callc case.
3804   __ bind(LnotFinal);
3805   __ lhz(Rindex, in_bytes(ResolvedMethodEntry::table_index_offset()), Rcache);
3806   __ profile_virtual_call(Rrecv_klass, Rtemp1, Rscratch);
3807   generate_vtable_call(Rrecv_klass, Rindex, Rret, Rscratch);
3808 }
3809 
3810 void TemplateTable::invokeinterface(int byte_no) {
3811   assert(byte_no == f1_byte, "use this argument");
3812   transition(vtos, vtos);
3813 
3814   const Register Rscratch1        = R11_scratch1,
3815                  Rscratch2        = R12_scratch2,
3816                  Rreceiver        = R3_ARG1,
3817                  Rrecv_klass      = R4_ARG2,
3818                  Rinterface_klass = R5_ARG3,
3819                  Rmethod          = R6_ARG4,
3820                  Rmethod2         = R7_ARG5,
3821                  Rret_addr        = R8_ARG6,
3822                  Rindex           = R9_ARG7,
3823                  Rflags           = R10_ARG8,
3824                  Rcache           = R31;
3825 
3826   load_resolved_method_entry_interface(Rcache, noreg, noreg, Rflags);
3827   prepare_invoke(Rcache, Rret_addr, Rreceiver, Rscratch1);
3828 
3829   // First check for Object case, then private interface method,
3830   // then regular interface method.
3831 
3832   // Get receiver klass - this is also a null check
3833   __ load_klass_check_null_throw(Rrecv_klass, Rreceiver, Rscratch2);
3834 
3835   // Check corner case object method.
3836   // Special case of invokeinterface called for virtual method of
3837   // java.lang.Object. See ResolvedMethodEntry for details:
3838   // The invokeinterface was rewritten to a invokevirtual, hence we have
3839   // to handle this corner case.
3840 
3841   Label LnotObjectMethod, Lthrow_ame;
3842   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_forced_virtual_shift);
3843   __ bfalse(CR0, LnotObjectMethod);
3844   invokeinterface_object_method(Rrecv_klass, Rret_addr, Rflags, Rcache, Rscratch1, Rscratch2);
3845   __ bind(LnotObjectMethod);
3846 
3847   __ ld(Rinterface_klass, in_bytes(ResolvedMethodEntry::klass_offset()), Rcache);
3848   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3849 
3850   // Check for private method invocation - indicated by vfinal
3851   Label LnotVFinal, L_no_such_interface, L_subtype;
3852 
3853   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3854   __ bfalse(CR0, LnotVFinal);
3855 
3856   __ check_klass_subtype(Rrecv_klass, Rinterface_klass, Rscratch1, Rscratch2, L_subtype);
3857   // If we get here the typecheck failed
3858   __ b(L_no_such_interface);
3859   __ bind(L_subtype);
3860 
3861   // do the call
3862 
3863   Register Rscratch = Rflags; // Rflags is dead now.
3864 
3865   __ profile_final_call(Rscratch1, Rscratch);
3866   __ profile_arguments_type(Rmethod, Rscratch, Rrecv_klass /* scratch */, true);
3867 
3868   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch, Rrecv_klass /* scratch */);
3869 
3870   __ bind(LnotVFinal);
3871   __ lookup_interface_method(Rrecv_klass, Rinterface_klass, noreg, noreg, Rscratch1, Rscratch2,
3872                              L_no_such_interface, /*return_method=*/false);
3873 
3874   __ profile_virtual_call(Rrecv_klass, Rscratch1, Rscratch2);
3875 
3876   // Find entry point to call.
3877 
3878   // Get declaring interface class from method
3879   __ load_method_holder(Rinterface_klass, Rmethod);
3880 
3881   // Get itable index from method
3882   __ lwa(Rindex, in_bytes(Method::itable_index_offset()), Rmethod);
3883   __ subfic(Rindex, Rindex, Method::itable_index_max);
3884 
3885   __ lookup_interface_method(Rrecv_klass, Rinterface_klass, Rindex, Rmethod2, Rscratch1, Rscratch2,
3886                              L_no_such_interface);
3887 
3888   __ cmpdi(CR0, Rmethod2, 0);
3889   __ beq(CR0, Lthrow_ame);
3890   // Found entry. Jump off!
3891   // Argument and return type profiling.
3892   __ profile_arguments_type(Rmethod2, Rscratch1, Rscratch2, true);
3893   __ call_from_interpreter(Rmethod2, Rret_addr, Rscratch1, Rscratch2);
3894 
3895   // Vtable entry was null => Throw abstract method error.
3896   __ bind(Lthrow_ame);
3897   // Pass arguments for generating a verbose error message.
3898   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose),
3899           Rrecv_klass, Rmethod);
3900 
3901   // Interface was not found => Throw incompatible class change error.
3902   __ bind(L_no_such_interface);
3903   // Pass arguments for generating a verbose error message.
3904   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose),
3905           Rrecv_klass, Rinterface_klass);
3906   DEBUG_ONLY( __ should_not_reach_here(); )
3907 }
3908 
3909 void TemplateTable::invokedynamic(int byte_no) {
3910   transition(vtos, vtos);
3911 
3912   const Register Rret_addr = R3_RET;
3913   const Register Rmethod   = R22_tmp2;
3914   const Register Rscratch1 = R30;
3915   const Register Rscratch2 = R11_scratch1;
3916 
3917   // Returns target method in Rmethod and return address in R3_RET. Kills all argument registers.
3918   load_invokedynamic_entry(Rmethod);
3919 
3920   // Profile this call.
3921   __ profile_call(Rscratch1, Rscratch2);
3922 
3923   // Off we go. With the new method handles, we don't jump to a method handle
3924   // entry any more. Instead, we pushed an "appendix" in prepare invoke, which happens
3925   // to be the callsite object the bootstrap method returned. This is passed to a
3926   // "link" method which does the dispatch (Most likely just grabs the MH stored
3927   // inside the callsite and does an invokehandle).
3928   // Argument and return type profiling.
3929   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, false);
3930   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
3931 }
3932 
3933 void TemplateTable::invokehandle(int byte_no) {
3934   transition(vtos, vtos);
3935 
3936   const Register Rret_addr = R3_ARG1,
3937                  Rflags    = R12_scratch2,
3938                  Rrecv     = R5_ARG3,
3939                  Rmethod   = R22_tmp2,
3940                  Rscratch1 = R30,
3941                  Rscratch2 = R11_scratch1,
3942                  Rcache    = R31;
3943 
3944   load_resolved_method_entry_handle(Rcache,  // ResolvedMethodEntry*
3945                                     Rmethod, // Method*
3946                                     Rscratch1,
3947                                     Rflags);
3948   prepare_invoke(Rcache, Rret_addr, Rrecv, Rscratch1);
3949   __ verify_method_ptr(Rmethod);
3950   __ null_check_throw(Rrecv, -1, Rscratch2);
3951 
3952   __ profile_final_call(Rrecv, Rscratch1);
3953 
3954   // Still no call from handle => We call the method handle interpreter here.
3955   // Argument and return type profiling.
3956   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, true);
3957   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
3958 }
3959 
3960 // =============================================================================
3961 // Allocation
3962 
3963 // Puts allocated obj ref onto the expression stack.
3964 void TemplateTable::_new() {
3965   transition(vtos, atos);
3966 
3967   Label Lslow_case,
3968         Ldone;
3969 
3970   const Register RallocatedObject = R17_tos,
3971                  RinstanceKlass   = R9_ARG7,
3972                  Rscratch         = R11_scratch1,
3973                  Roffset          = R8_ARG6,
3974                  Rinstance_size   = Roffset,
3975                  Rcpool           = R4_ARG2,
3976                  Rtags            = R3_ARG1,
3977                  Rindex           = R5_ARG3;
3978 
3979   // --------------------------------------------------------------------------
3980   // Check if fast case is possible.
3981 
3982   // Load pointers to const pool and const pool's tags array.
3983   __ get_cpool_and_tags(Rcpool, Rtags);
3984   // Load index of constant pool entry.
3985   __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
3986 
3987   // Note: compared to other architectures, PPC's implementation always goes
3988   // to the slow path if TLAB is used and fails.
3989   if (UseTLAB) {
3990     // Make sure the class we're about to instantiate has been resolved
3991     // This is done before loading instanceKlass to be consistent with the order
3992     // how Constant Pool is updated (see ConstantPoolCache::klass_at_put).
3993     __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
3994     __ lbzx(Rtags, Rindex, Rtags);
3995 
3996     __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
3997     __ bne(CR0, Lslow_case);
3998 
3999     // Get instanceKlass
4000     __ sldi(Roffset, Rindex, LogBytesPerWord);
4001     __ load_resolved_klass_at_offset(Rcpool, Roffset, RinstanceKlass);
4002 
4003     // Make sure klass is initialized.
4004     assert(VM_Version::supports_fast_class_init_checks(), "Optimization requires support for fast class initialization checks");
4005     __ clinit_barrier(RinstanceKlass, R16_thread, nullptr /*L_fast_path*/, &Lslow_case);
4006 
4007     __ lwz(Rinstance_size, in_bytes(Klass::layout_helper_offset()), RinstanceKlass);
4008 
4009     // Make sure klass is not abstract, or interface or java/lang/Class.
4010     __ andi_(R0, Rinstance_size, Klass::_lh_instance_slow_path_bit); // slow path bit equals 0?
4011     __ bne(CR0, Lslow_case);
4012 
4013     // --------------------------------------------------------------------------
4014     // Fast case:
4015     // Allocate the instance.
4016     // 1) Try to allocate in the TLAB.
4017     // 2) If the above fails (or is not applicable), go to a slow case (creates a new TLAB, etc.).
4018 
4019     Register RoldTopValue = RallocatedObject; // Object will be allocated here if it fits.
4020     Register RnewTopValue = R6_ARG4;
4021     Register RendValue    = R7_ARG5;
4022 
4023     // Check if we can allocate in the TLAB.
4024     __ ld(RoldTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
4025     __ ld(RendValue,    in_bytes(JavaThread::tlab_end_offset()), R16_thread);
4026 
4027     __ add(RnewTopValue, Rinstance_size, RoldTopValue);
4028 
4029     // If there is enough space, we do not CAS and do not clear.
4030     __ cmpld(CR0, RnewTopValue, RendValue);
4031     __ bgt(CR0, Lslow_case);
4032 
4033     __ std(RnewTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
4034 
4035     if (!ZeroTLAB) {
4036       // --------------------------------------------------------------------------
4037       // Init1: Zero out newly allocated memory.
4038       // Initialize remaining object fields.
4039       Register Rbase = Rtags;
4040       int header_size = oopDesc::header_size() * HeapWordSize;
4041       __ addi(Rinstance_size, Rinstance_size, 7 - header_size);
4042       __ addi(Rbase, RallocatedObject, header_size);
4043       __ srdi(Rinstance_size, Rinstance_size, 3);
4044 
4045       // Clear out object skipping header. Takes also care of the zero length case.
4046       __ clear_memory_doubleword(Rbase, Rinstance_size);
4047     }
4048 
4049     // --------------------------------------------------------------------------
4050     // Init2: Initialize the header: mark, klass
4051     // Init mark.
4052     if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) {
4053       __ ld(Rscratch, in_bytes(Klass::prototype_header_offset()), RinstanceKlass);
4054     } else {
4055       __ load_const_optimized(Rscratch, markWord::prototype().value(), R0);
4056     }
4057     __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject);
4058 
4059     if (!UseCompactObjectHeaders) {
4060       __ store_klass_gap(RallocatedObject);
4061       __ store_klass(RallocatedObject, RinstanceKlass, Rscratch);
4062     }
4063 
4064     __ b(Ldone);
4065   }
4066 
4067   // --------------------------------------------------------------------------
4068   // slow case
4069   __ bind(Lslow_case);
4070   __ call_VM_preemptable(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Rcpool, Rindex);
4071 
4072   // continue
4073   __ bind(Ldone);
4074 
4075   // Must prevent reordering of stores for object initialization with stores that publish the new object.
4076   __ membar(Assembler::StoreStore);
4077 }
4078 
4079 void TemplateTable::newarray() {
4080   transition(itos, atos);
4081 
4082   __ lbz(R4, 1, R14_bcp);
4083   __ extsw(R5, R17_tos);
4084   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), R4, R5 /* size */);
4085 
4086   // Must prevent reordering of stores for object initialization with stores that publish the new object.
4087   __ membar(Assembler::StoreStore);
4088 }
4089 
4090 void TemplateTable::anewarray() {
4091   transition(itos, atos);
4092 
4093   __ get_constant_pool(R4);
4094   __ get_2_byte_integer_at_bcp(1, R5, InterpreterMacroAssembler::Unsigned);
4095   __ extsw(R6, R17_tos); // size
4096   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), R4 /* pool */, R5 /* index */, R6 /* size */);
4097 
4098   // Must prevent reordering of stores for object initialization with stores that publish the new object.
4099   __ membar(Assembler::StoreStore);
4100 }
4101 
4102 // Allocate a multi dimensional array
4103 void TemplateTable::multianewarray() {
4104   transition(vtos, atos);
4105 
4106   Register Rptr = R31; // Needs to survive C call.
4107 
4108   // Put ndims * wordSize into frame temp slot
4109   __ lbz(Rptr, 3, R14_bcp);
4110   __ sldi(Rptr, Rptr, Interpreter::logStackElementSize);
4111   // Esp points past last_dim, so set to R4 to first_dim address.
4112   __ add(R4, Rptr, R15_esp);
4113   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R4 /* first_size_address */);
4114   // Pop all dimensions off the stack.
4115   __ add(R15_esp, Rptr, R15_esp);
4116 
4117   // Must prevent reordering of stores for object initialization with stores that publish the new object.
4118   __ membar(Assembler::StoreStore);
4119 }
4120 
4121 void TemplateTable::arraylength() {
4122   transition(atos, itos);
4123 
4124   __ verify_oop(R17_tos);
4125   __ null_check_throw(R17_tos, arrayOopDesc::length_offset_in_bytes(), R11_scratch1);
4126   __ lwa(R17_tos, arrayOopDesc::length_offset_in_bytes(), R17_tos);
4127 }
4128 
4129 // ============================================================================
4130 // Typechecks
4131 
4132 void TemplateTable::checkcast() {
4133   transition(atos, atos);
4134 
4135   Label Ldone, Lis_null, Lquicked, Lresolved;
4136   Register Roffset         = R6_ARG4,
4137            RobjKlass       = R4_ARG2,
4138            RspecifiedKlass = R5_ARG3, // Generate_ClassCastException_verbose_handler will read value from this register.
4139            Rcpool          = R11_scratch1,
4140            Rtags           = R12_scratch2;
4141 
4142   // Null does not pass.
4143   __ cmpdi(CR0, R17_tos, 0);
4144   __ beq(CR0, Lis_null);
4145 
4146   // Get constant pool tag to find out if the bytecode has already been "quickened".
4147   __ get_cpool_and_tags(Rcpool, Rtags);
4148 
4149   __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
4150 
4151   __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
4152   __ lbzx(Rtags, Rtags, Roffset);
4153 
4154   __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
4155   __ beq(CR0, Lquicked);
4156 
4157   // Call into the VM to "quicken" instanceof.
4158   __ push_ptr();  // for GC
4159   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4160   __ get_vm_result_metadata(RspecifiedKlass);
4161   __ pop_ptr();   // Restore receiver.
4162   __ b(Lresolved);
4163 
4164   // Extract target class from constant pool.
4165   __ bind(Lquicked);
4166   __ sldi(Roffset, Roffset, LogBytesPerWord);
4167   __ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
4168 
4169   // Do the checkcast.
4170   __ bind(Lresolved);
4171   // Get value klass in RobjKlass.
4172   __ load_klass(RobjKlass, R17_tos);
4173   // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
4174   __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
4175 
4176   // Not a subtype; so must throw exception
4177   // Target class oop is in register R6_ARG4 == RspecifiedKlass by convention.
4178   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ClassCastException_entry);
4179   __ mtctr(R11_scratch1);
4180   __ bctr();
4181 
4182   // Profile the null case.
4183   __ align(32, 12);
4184   __ bind(Lis_null);
4185   __ profile_null_seen(R11_scratch1, Rtags); // Rtags used as scratch.
4186 
4187   __ align(32, 12);
4188   __ bind(Ldone);
4189 }
4190 
4191 // Output:
4192 //   - tos == 0: Obj was null or not an instance of class.
4193 //   - tos == 1: Obj was an instance of class.
4194 void TemplateTable::instanceof() {
4195   transition(atos, itos);
4196 
4197   Label Ldone, Lis_null, Lquicked, Lresolved;
4198   Register Roffset         = R6_ARG4,
4199            RobjKlass       = R4_ARG2,
4200            RspecifiedKlass = R5_ARG3,
4201            Rcpool          = R11_scratch1,
4202            Rtags           = R12_scratch2;
4203 
4204   // Null does not pass.
4205   __ cmpdi(CR0, R17_tos, 0);
4206   __ beq(CR0, Lis_null);
4207 
4208   // Get constant pool tag to find out if the bytecode has already been "quickened".
4209   __ get_cpool_and_tags(Rcpool, Rtags);
4210 
4211   __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
4212 
4213   __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
4214   __ lbzx(Rtags, Rtags, Roffset);
4215 
4216   __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
4217   __ beq(CR0, Lquicked);
4218 
4219   // Call into the VM to "quicken" instanceof.
4220   __ push_ptr();  // for GC
4221   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4222   __ get_vm_result_metadata(RspecifiedKlass);
4223   __ pop_ptr();   // Restore receiver.
4224   __ b(Lresolved);
4225 
4226   // Extract target class from constant pool.
4227   __ bind(Lquicked);
4228   __ sldi(Roffset, Roffset, LogBytesPerWord);
4229   __ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
4230 
4231   // Do the checkcast.
4232   __ bind(Lresolved);
4233   // Get value klass in RobjKlass.
4234   __ load_klass(RobjKlass, R17_tos);
4235   // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
4236   __ li(R17_tos, 1);
4237   __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
4238   __ li(R17_tos, 0);
4239 
4240   if (ProfileInterpreter) {
4241     __ b(Ldone);
4242   }
4243 
4244   // Profile the null case.
4245   __ align(32, 12);
4246   __ bind(Lis_null);
4247   __ profile_null_seen(Rcpool, Rtags); // Rcpool and Rtags used as scratch.
4248 
4249   __ align(32, 12);
4250   __ bind(Ldone);
4251 }
4252 
4253 // =============================================================================
4254 // Breakpoints
4255 
4256 void TemplateTable::_breakpoint() {
4257   transition(vtos, vtos);
4258 
4259   // Get the unpatched byte code.
4260   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R19_method, R14_bcp);
4261   __ mr(R31, R3_RET);
4262 
4263   // Post the breakpoint event.
4264   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), R19_method, R14_bcp);
4265 
4266   // Complete the execution of original bytecode.
4267   __ dispatch_Lbyte_code(vtos, R31, Interpreter::normal_table(vtos));
4268 }
4269 
4270 // =============================================================================
4271 // Exceptions
4272 
4273 void TemplateTable::athrow() {
4274   transition(atos, vtos);
4275 
4276   // Exception oop is in tos
4277   __ verify_oop(R17_tos);
4278 
4279   __ null_check_throw(R17_tos, -1, R11_scratch1);
4280 
4281   // Throw exception interpreter entry expects exception oop to be in R3.
4282   __ mr(R3_RET, R17_tos);
4283   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::throw_exception_entry());
4284   __ mtctr(R11_scratch1);
4285   __ bctr();
4286 }
4287 
4288 // =============================================================================
4289 // Synchronization
4290 // Searches the basic object lock list on the stack for a free slot
4291 // and uses it to lock the object in tos.
4292 //
4293 // Recursive locking is enabled by exiting the search if the same
4294 // object is already found in the list. Thus, a new basic lock obj lock
4295 // is allocated "higher up" in the stack and thus is found first
4296 // at next monitor exit.
4297 void TemplateTable::monitorenter() {
4298   transition(atos, vtos);
4299   __ verify_oop(R17_tos);
4300 
4301   Register Rcurrent_monitor  = R3_ARG1,
4302            Rcurrent_obj      = R4_ARG2,
4303            Robj_to_lock      = R17_tos,
4304            Rscratch1         = R11_scratch1,
4305            Rscratch2         = R12_scratch2,
4306            Rbot              = R5_ARG3,
4307            Rfree_slot        = R6_ARG4;
4308 
4309   Label Lfound, Lallocate_new;
4310 
4311   __ ld(Rscratch1, _abi0(callers_sp), R1_SP); // load FP
4312   __ li(Rfree_slot, 0); // Points to free slot or null.
4313 
4314   // Set up search loop - start with topmost monitor.
4315   __ mr(Rcurrent_monitor, R26_monitor);
4316   __ addi(Rbot, Rscratch1, -frame::ijava_state_size);
4317 
4318   // ------------------------------------------------------------------------------
4319   // Null pointer exception.
4320   __ null_check_throw(Robj_to_lock, -1, Rscratch1);
4321 
4322   Label is_inline_type;
4323   __ ld(Rscratch1, oopDesc::mark_offset_in_bytes(), Robj_to_lock);
4324   __ test_markword_is_inline_type(Rscratch1, is_inline_type);
4325 
4326   // Check if any slot is present => short cut to allocation if not.
4327   __ cmpld(CR0, Rcurrent_monitor, Rbot);
4328   __ beq(CR0, Lallocate_new);
4329 
4330   // ------------------------------------------------------------------------------
4331   // Find a free slot in the monitor block.
4332   // Note: The order of the monitors is important for C2 OSR which derives the
4333   //       unlock order from it (see comments for interpreter_frame_monitor_*).
4334   {
4335     Label Lloop, LnotFree, Lexit;
4336 
4337     __ bind(Lloop);
4338     __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor);
4339     // Exit if current entry is for same object; this guarantees, that new monitor
4340     // used for recursive lock is above the older one.
4341     __ cmpd(CR0, Rcurrent_obj, Robj_to_lock);
4342     __ beq(CR0, Lexit); // recursive locking
4343 
4344     __ cmpdi(CR0, Rcurrent_obj, 0);
4345     __ bne(CR0, LnotFree);
4346     __ mr(Rfree_slot, Rcurrent_monitor); // remember free slot closest to the bottom
4347     __ bind(LnotFree);
4348 
4349     __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
4350     __ cmpld(CR0, Rcurrent_monitor, Rbot);
4351     __ bne(CR0, Lloop);
4352     __ bind(Lexit);
4353   }
4354 
4355   // ------------------------------------------------------------------------------
4356   // Check if we found a free slot.
4357   __ cmpdi(CR0, Rfree_slot, 0);
4358   __ bne(CR0, Lfound);
4359 
4360   // We didn't find a free BasicObjLock => allocate one.
4361   __ bind(Lallocate_new);
4362   __ add_monitor_to_stack(false, Rscratch1, Rscratch2);
4363   __ mr(Rfree_slot, R26_monitor);
4364 
4365   // ------------------------------------------------------------------------------
4366   // We now have a slot to lock.
4367   __ bind(Lfound);
4368 
4369   // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
4370   // The object has already been popped from the stack, so the expression stack looks correct.
4371   __ addi(R14_bcp, R14_bcp, 1);
4372 
4373   __ std(Robj_to_lock, in_bytes(BasicObjectLock::obj_offset()), Rfree_slot);
4374   __ lock_object(Rfree_slot, Robj_to_lock);
4375 
4376   // Check if there's enough space on the stack for the monitors after locking.
4377   // This emits a single store.
4378   __ generate_stack_overflow_check(0);
4379 
4380   // The bcp has already been incremented. Just need to dispatch to next instruction.
4381   __ dispatch_next(vtos);
4382 
4383   __ bind(is_inline_type);
4384   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4385                     InterpreterRuntime::throw_identity_exception), Robj_to_lock);
4386   __ should_not_reach_here();
4387 }
4388 
4389 void TemplateTable::monitorexit() {
4390   transition(atos, vtos);
4391   __ verify_oop(R17_tos);
4392 
4393   Register Rcurrent_monitor  = R3_ARG1,
4394            Rcurrent_obj      = R4_ARG2,
4395            Robj_to_lock      = R17_tos,
4396            Rscratch          = R11_scratch1,
4397            Rbot              = R12_scratch2;
4398 
4399   Label Lfound, Lillegal_monitor_state;
4400 
4401   __ ld(Rscratch, _abi0(callers_sp), R1_SP); // load FP
4402 
4403   // Set up search loop - start with topmost monitor.
4404   __ mr(Rcurrent_monitor, R26_monitor);
4405   __ addi(Rbot, Rscratch, -frame::ijava_state_size);
4406 
4407   // Null pointer check.
4408   __ null_check_throw(Robj_to_lock, -1, Rscratch);
4409 
4410   const int is_inline_type_mask = markWord::inline_type_pattern;
4411   __ ld(Rscratch, oopDesc::mark_offset_in_bytes(), Robj_to_lock);
4412   __ andi(Rscratch, Rscratch, is_inline_type_mask);
4413   __ cmpwi(CR0, Rscratch, is_inline_type_mask);
4414   __ beq(CR0, Lillegal_monitor_state);
4415 
4416   // Check corner case: unbalanced monitorEnter / Exit.
4417   __ cmpld(CR0, Rcurrent_monitor, Rbot);
4418   __ beq(CR0, Lillegal_monitor_state);
4419 
4420   // Find the corresponding slot in the monitors stack section.
4421   {
4422     Label Lloop;
4423 
4424     __ bind(Lloop);
4425     __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor);
4426     // Is this entry for same obj?
4427     __ cmpd(CR0, Rcurrent_obj, Robj_to_lock);
4428     __ beq(CR0, Lfound);
4429 
4430     __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
4431     __ cmpld(CR0, Rcurrent_monitor, Rbot);
4432     __ bne(CR0, Lloop);
4433   }
4434 
4435   // Fell through without finding the basic obj lock => throw up!
4436   __ bind(Lillegal_monitor_state);
4437   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
4438   __ should_not_reach_here();
4439 
4440   __ align(32, 12);
4441   __ bind(Lfound);
4442   __ unlock_object(Rcurrent_monitor);
4443 }
4444 
4445 // ============================================================================
4446 // Wide bytecodes
4447 
4448 // Wide instructions. Simply redirects to the wide entry point for that instruction.
4449 void TemplateTable::wide() {
4450   transition(vtos, vtos);
4451 
4452   const Register Rtable = R11_scratch1,
4453                  Rindex = R12_scratch2,
4454                  Rtmp   = R0;
4455 
4456   __ lbz(Rindex, 1, R14_bcp);
4457 
4458   __ load_dispatch_table(Rtable, Interpreter::_wentry_point);
4459 
4460   __ slwi(Rindex, Rindex, LogBytesPerWord);
4461   __ ldx(Rtmp, Rtable, Rindex);
4462   __ mtctr(Rtmp);
4463   __ bctr();
4464   // Note: the bcp increment step is part of the individual wide bytecode implementations.
4465 }