1 /*
   2  * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2013, 2025 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "cds/cdsConfig.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/barrierSetAssembler.hpp"
  30 #include "gc/shared/tlab_globals.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interpreterRuntime.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "interpreter/templateInterpreter.hpp"
  35 #include "interpreter/templateTable.hpp"
  36 #include "memory/universe.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "oops/methodCounters.hpp"
  39 #include "oops/methodData.hpp"
  40 #include "oops/objArrayKlass.hpp"
  41 #include "oops/oop.inline.hpp"
  42 #include "oops/resolvedFieldEntry.hpp"
  43 #include "oops/resolvedIndyEntry.hpp"
  44 #include "oops/resolvedMethodEntry.hpp"
  45 #include "prims/jvmtiExport.hpp"
  46 #include "prims/methodHandles.hpp"
  47 #include "runtime/frame.inline.hpp"
  48 #include "runtime/safepointMechanism.hpp"
  49 #include "runtime/sharedRuntime.hpp"
  50 #include "runtime/stubRoutines.hpp"
  51 #include "runtime/synchronizer.hpp"
  52 #include "runtime/vm_version.hpp"
  53 #include "utilities/macros.hpp"
  54 #include "utilities/powerOfTwo.hpp"
  55 
  56 #undef __
  57 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
  58 
  59 // ============================================================================
  60 // Misc helpers
  61 
  62 // Do an oop store like *(base + index) = val OR *(base + offset) = val
  63 // (only one of both variants is possible at the same time).
  64 // Index can be noreg.
  65 // Kills:
  66 //   Rbase, Rtmp
  67 static void do_oop_store(InterpreterMacroAssembler* _masm,
  68                          Register           base,
  69                          RegisterOrConstant offset,
  70                          Register           val,         // Noreg means always null.
  71                          Register           tmp1,
  72                          Register           tmp2,
  73                          Register           tmp3,
  74                          DecoratorSet       decorators) {
  75   assert_different_registers(tmp1, tmp2, tmp3, val, base);
  76   __ store_heap_oop(val, offset, base, tmp1, tmp2, tmp3, MacroAssembler::PRESERVATION_NONE, decorators);
  77 }
  78 
  79 static void do_oop_load(InterpreterMacroAssembler* _masm,
  80                         Register base,
  81                         RegisterOrConstant offset,
  82                         Register dst,
  83                         Register tmp1,
  84                         Register tmp2,
  85                         DecoratorSet decorators) {
  86   assert_different_registers(base, tmp1, tmp2);
  87   assert_different_registers(dst, tmp1, tmp2);
  88   __ load_heap_oop(dst, offset, base, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE, decorators);
  89 }
  90 
  91 Address TemplateTable::at_bcp(int offset) {
  92   // Not used on ppc.
  93   ShouldNotReachHere();
  94   return Address();
  95 }
  96 
  97 // Patches the current bytecode (ptr to it located in bcp)
  98 // in the bytecode stream with a new one.
  99 void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Register Rtemp, bool load_bc_into_bc_reg /*=true*/, int byte_no) {
 100   // With sharing on, may need to test method flag.
 101   if (!RewriteBytecodes) return;
 102   Label L_patch_done;
 103 
 104   switch (new_bc) {
 105     case Bytecodes::_fast_aputfield:
 106     case Bytecodes::_fast_bputfield:
 107     case Bytecodes::_fast_zputfield:
 108     case Bytecodes::_fast_cputfield:
 109     case Bytecodes::_fast_dputfield:
 110     case Bytecodes::_fast_fputfield:
 111     case Bytecodes::_fast_iputfield:
 112     case Bytecodes::_fast_lputfield:
 113     case Bytecodes::_fast_sputfield:
 114     {
 115       // We skip bytecode quickening for putfield instructions when
 116       // the put_code written to the constant pool cache is zero.
 117       // This is required so that every execution of this instruction
 118       // calls out to InterpreterRuntime::resolve_get_put to do
 119       // additional, required work.
 120       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
 121       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
 122       __ load_field_entry(Rtemp, Rnew_bc);
 123       int code_offset = (byte_no == f1_byte) ? in_bytes(ResolvedFieldEntry::get_code_offset())
 124                                              : in_bytes(ResolvedFieldEntry::put_code_offset());
 125       __ lbz(Rnew_bc, code_offset, Rtemp);
 126       __ cmpwi(CR0, Rnew_bc, 0);
 127       __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
 128       __ beq(CR0, L_patch_done);
 129       // __ isync(); // acquire not needed
 130       break;
 131     }
 132 
 133     default:
 134       assert(byte_no == -1, "sanity");
 135       if (load_bc_into_bc_reg) {
 136         __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
 137       }
 138   }
 139 
 140   if (JvmtiExport::can_post_breakpoint()) {
 141     Label L_fast_patch;
 142     __ lbz(Rtemp, 0, R14_bcp);
 143     __ cmpwi(CR0, Rtemp, (unsigned int)(unsigned char)Bytecodes::_breakpoint);
 144     __ bne(CR0, L_fast_patch);
 145     // Perform the quickening, slowly, in the bowels of the breakpoint table.
 146     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R19_method, R14_bcp, Rnew_bc);
 147     __ b(L_patch_done);
 148     __ bind(L_fast_patch);
 149   }
 150 
 151   // Patch bytecode with release store to coordinate with ResolvedFieldEntry
 152   // and ResolvedMethodEntry loads in fast bytecode codelets.
 153   __ release();
 154   __ stb(Rnew_bc, 0, R14_bcp);
 155 
 156   __ bind(L_patch_done);
 157 }
 158 
 159 // ============================================================================
 160 // Individual instructions
 161 
 162 void TemplateTable::nop() {
 163   transition(vtos, vtos);
 164   // Nothing to do.
 165 }
 166 
 167 void TemplateTable::shouldnotreachhere() {
 168   transition(vtos, vtos);
 169   __ stop("shouldnotreachhere bytecode");
 170 }
 171 
 172 void TemplateTable::aconst_null() {
 173   transition(vtos, atos);
 174   __ li(R17_tos, 0);
 175 }
 176 
 177 void TemplateTable::iconst(int value) {
 178   transition(vtos, itos);
 179   assert(value >= -1 && value <= 5, "");
 180   __ li(R17_tos, value);
 181 }
 182 
 183 void TemplateTable::lconst(int value) {
 184   transition(vtos, ltos);
 185   assert(value >= -1 && value <= 5, "");
 186   __ li(R17_tos, value);
 187 }
 188 
 189 void TemplateTable::fconst(int value) {
 190   transition(vtos, ftos);
 191   static float zero = 0.0;
 192   static float one  = 1.0;
 193   static float two  = 2.0;
 194   switch (value) {
 195     default: ShouldNotReachHere();
 196     case 0: {
 197       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
 198       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 199       break;
 200     }
 201     case 1: {
 202       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
 203       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 204       break;
 205     }
 206     case 2: {
 207       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&two, R0, true);
 208       __ lfs(F15_ftos, simm16_offset, R11_scratch1);
 209       break;
 210     }
 211   }
 212 }
 213 
 214 void TemplateTable::dconst(int value) {
 215   transition(vtos, dtos);
 216   static double zero = 0.0;
 217   static double one  = 1.0;
 218   switch (value) {
 219     case 0: {
 220       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
 221       __ lfd(F15_ftos, simm16_offset, R11_scratch1);
 222       break;
 223     }
 224     case 1: {
 225       int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
 226       __ lfd(F15_ftos, simm16_offset, R11_scratch1);
 227       break;
 228     }
 229     default: ShouldNotReachHere();
 230   }
 231 }
 232 
 233 void TemplateTable::bipush() {
 234   transition(vtos, itos);
 235   __ lbz(R17_tos, 1, R14_bcp);
 236   __ extsb(R17_tos, R17_tos);
 237 }
 238 
 239 void TemplateTable::sipush() {
 240   transition(vtos, itos);
 241   __ get_2_byte_integer_at_bcp(1, R17_tos, InterpreterMacroAssembler::Signed);
 242 }
 243 
 244 void TemplateTable::ldc(LdcType type) {
 245   Register Rscratch1 = R11_scratch1,
 246            Rscratch2 = R12_scratch2,
 247            Rcpool    = R3_ARG1;
 248 
 249   transition(vtos, vtos);
 250   Label notInt, notFloat, notClass, exit;
 251 
 252   __ get_cpool_and_tags(Rcpool, Rscratch2); // Set Rscratch2 = &tags.
 253   if (is_ldc_wide(type)) { // Read index.
 254     __ get_2_byte_integer_at_bcp(1, Rscratch1, InterpreterMacroAssembler::Unsigned);
 255   } else {
 256     __ lbz(Rscratch1, 1, R14_bcp);
 257   }
 258 
 259   const int base_offset = ConstantPool::header_size() * wordSize;
 260   const int tags_offset = Array<u1>::base_offset_in_bytes();
 261 
 262   // Get type from tags.
 263   __ addi(Rscratch2, Rscratch2, tags_offset);
 264   __ lbzx(Rscratch2, Rscratch2, Rscratch1);
 265 
 266   __ cmpwi(CR0, Rscratch2, JVM_CONSTANT_UnresolvedClass); // Unresolved class?
 267   __ cmpwi(CR1, Rscratch2, JVM_CONSTANT_UnresolvedClassInError); // Unresolved class in error state?
 268   __ cror(CR0, Assembler::equal, CR1, Assembler::equal);
 269 
 270   // Resolved class - need to call vm to get java mirror of the class.
 271   __ cmpwi(CR1, Rscratch2, JVM_CONSTANT_Class);
 272   __ crnor(CR0, Assembler::equal, CR1, Assembler::equal); // Neither resolved class nor unresolved case from above?
 273   __ beq(CR0, notClass);
 274 
 275   __ li(R4, is_ldc_wide(type) ? 1 : 0);
 276   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), R4);
 277   __ push(atos);
 278   __ b(exit);
 279 
 280   __ align(32, 12);
 281   __ bind(notClass);
 282   __ addi(Rcpool, Rcpool, base_offset);
 283   __ sldi(Rscratch1, Rscratch1, LogBytesPerWord);
 284   __ cmpdi(CR0, Rscratch2, JVM_CONSTANT_Integer);
 285   __ bne(CR0, notInt);
 286   __ lwax(R17_tos, Rcpool, Rscratch1);
 287   __ push(itos);
 288   __ b(exit);
 289 
 290   __ align(32, 12);
 291   __ bind(notInt);
 292   __ cmpdi(CR0, Rscratch2, JVM_CONSTANT_Float);
 293   __ bne(CR0, notFloat);
 294   __ lfsx(F15_ftos, Rcpool, Rscratch1);
 295   __ push(ftos);
 296   __ b(exit);
 297 
 298   __ align(32, 12);
 299   // assume the tag is for condy; if not, the VM runtime will tell us
 300   __ bind(notFloat);
 301   condy_helper(exit);
 302 
 303   __ align(32, 12);
 304   __ bind(exit);
 305 }
 306 
 307 // Fast path for caching oop constants.
 308 void TemplateTable::fast_aldc(LdcType type) {
 309   transition(vtos, atos);
 310 
 311   int index_size = is_ldc_wide(type) ? sizeof(u2) : sizeof(u1);
 312   Label is_null;
 313 
 314   // We are resolved if the resolved reference cache entry contains a
 315   // non-null object (CallSite, etc.)
 316   __ get_cache_index_at_bcp(R31, 1, index_size);  // Load index.
 317   // Only rewritten during link time. So, no need for memory barriers for accessing resolved info.
 318   __ load_resolved_reference_at_index(R17_tos, R31, R11_scratch1, R12_scratch2, &is_null);
 319 
 320   // Convert null sentinel to null
 321   int simm16_rest = __ load_const_optimized(R11_scratch1, Universe::the_null_sentinel_addr(), R0, true);
 322   __ ld(R31, simm16_rest, R11_scratch1);
 323   __ resolve_oop_handle(R31, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
 324   __ cmpld(CR0, R17_tos, R31);
 325   __ isel_0(R17_tos, CR0, Assembler::equal);
 326   __ verify_oop(R17_tos);
 327   __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
 328 
 329   __ bind(is_null);
 330   __ load_const_optimized(R3_ARG1, (int)bytecode());
 331 
 332   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 333 
 334   // First time invocation - must resolve first.
 335   __ call_VM(R17_tos, entry, R3_ARG1);
 336   __ verify_oop(R17_tos);
 337 }
 338 
 339 void TemplateTable::ldc2_w() {
 340   transition(vtos, vtos);
 341   Label not_double, not_long, exit;
 342 
 343   Register Rindex = R11_scratch1,
 344            Rcpool = R12_scratch2,
 345            Rtag   = R3_ARG1;
 346   __ get_cpool_and_tags(Rcpool, Rtag);
 347   __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
 348 
 349   const int base_offset = ConstantPool::header_size() * wordSize;
 350   const int tags_offset = Array<u1>::base_offset_in_bytes();
 351   // Get type from tags.
 352   __ addi(Rcpool, Rcpool, base_offset);
 353   __ addi(Rtag, Rtag, tags_offset);
 354 
 355   __ lbzx(Rtag, Rtag, Rindex);
 356   __ sldi(Rindex, Rindex, LogBytesPerWord);
 357 
 358   __ cmpdi(CR0, Rtag, JVM_CONSTANT_Double);
 359   __ bne(CR0, not_double);
 360   __ lfdx(F15_ftos, Rcpool, Rindex);
 361   __ push(dtos);
 362   __ b(exit);
 363 
 364   __ bind(not_double);
 365   __ cmpdi(CR0, Rtag, JVM_CONSTANT_Long);
 366   __ bne(CR0, not_long);
 367   __ ldx(R17_tos, Rcpool, Rindex);
 368   __ push(ltos);
 369   __ b(exit);
 370 
 371   __ bind(not_long);
 372   condy_helper(exit);
 373 
 374   __ align(32, 12);
 375   __ bind(exit);
 376 }
 377 
 378 void TemplateTable::condy_helper(Label& Done) {
 379   const Register obj   = R31;
 380   const Register off   = R11_scratch1;
 381   const Register flags = R12_scratch2;
 382   const Register rarg  = R4_ARG2;
 383   __ li(rarg, (int)bytecode());
 384   call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
 385   __ get_vm_result_metadata(flags);
 386 
 387   // VMr = obj = base address to find primitive value to push
 388   // VMr2 = flags = (tos, off) using format of CPCE::_flags
 389   __ andi(off, flags, ConstantPoolCache::field_index_mask);
 390 
 391   // What sort of thing are we loading?
 392   __ rldicl(flags, flags, 64-ConstantPoolCache::tos_state_shift, 64-ConstantPoolCache::tos_state_bits);
 393 
 394   switch (bytecode()) {
 395   case Bytecodes::_ldc:
 396   case Bytecodes::_ldc_w:
 397     {
 398       // tos in (itos, ftos, stos, btos, ctos, ztos)
 399       Label notInt, notFloat, notShort, notByte, notChar, notBool;
 400       __ cmplwi(CR0, flags, itos);
 401       __ bne(CR0, notInt);
 402       // itos
 403       __ lwax(R17_tos, obj, off);
 404       __ push(itos);
 405       __ b(Done);
 406 
 407       __ bind(notInt);
 408       __ cmplwi(CR0, flags, ftos);
 409       __ bne(CR0, notFloat);
 410       // ftos
 411       __ lfsx(F15_ftos, obj, off);
 412       __ push(ftos);
 413       __ b(Done);
 414 
 415       __ bind(notFloat);
 416       __ cmplwi(CR0, flags, stos);
 417       __ bne(CR0, notShort);
 418       // stos
 419       __ lhax(R17_tos, obj, off);
 420       __ push(stos);
 421       __ b(Done);
 422 
 423       __ bind(notShort);
 424       __ cmplwi(CR0, flags, btos);
 425       __ bne(CR0, notByte);
 426       // btos
 427       __ lbzx(R17_tos, obj, off);
 428       __ extsb(R17_tos, R17_tos);
 429       __ push(btos);
 430       __ b(Done);
 431 
 432       __ bind(notByte);
 433       __ cmplwi(CR0, flags, ctos);
 434       __ bne(CR0, notChar);
 435       // ctos
 436       __ lhzx(R17_tos, obj, off);
 437       __ push(ctos);
 438       __ b(Done);
 439 
 440       __ bind(notChar);
 441       __ cmplwi(CR0, flags, ztos);
 442       __ bne(CR0, notBool);
 443       // ztos
 444       __ lbzx(R17_tos, obj, off);
 445       __ push(ztos);
 446       __ b(Done);
 447 
 448       __ bind(notBool);
 449       break;
 450     }
 451 
 452   case Bytecodes::_ldc2_w:
 453     {
 454       Label notLong, notDouble;
 455       __ cmplwi(CR0, flags, ltos);
 456       __ bne(CR0, notLong);
 457       // ltos
 458       __ ldx(R17_tos, obj, off);
 459       __ push(ltos);
 460       __ b(Done);
 461 
 462       __ bind(notLong);
 463       __ cmplwi(CR0, flags, dtos);
 464       __ bne(CR0, notDouble);
 465       // dtos
 466       __ lfdx(F15_ftos, obj, off);
 467       __ push(dtos);
 468       __ b(Done);
 469 
 470       __ bind(notDouble);
 471       break;
 472     }
 473 
 474   default:
 475     ShouldNotReachHere();
 476   }
 477 
 478   __ stop("bad ldc/condy");
 479 }
 480 
 481 // Get the locals index located in the bytecode stream at bcp + offset.
 482 void TemplateTable::locals_index(Register Rdst, int offset) {
 483   __ lbz(Rdst, offset, R14_bcp);
 484 }
 485 
 486 void TemplateTable::iload() {
 487   iload_internal();
 488 }
 489 
 490 void TemplateTable::nofast_iload() {
 491   iload_internal(may_not_rewrite);
 492 }
 493 
 494 void TemplateTable::iload_internal(RewriteControl rc) {
 495   transition(vtos, itos);
 496 
 497   // Get the local value into tos
 498   const Register Rindex = R22_tmp2;
 499   locals_index(Rindex);
 500 
 501   // Rewrite iload,iload  pair into fast_iload2
 502   //         iload,caload pair into fast_icaload
 503   if (RewriteFrequentPairs && rc == may_rewrite) {
 504     Label Lrewrite, Ldone;
 505     Register Rnext_byte  = R3_ARG1,
 506              Rrewrite_to = R6_ARG4,
 507              Rscratch    = R11_scratch1;
 508 
 509     // get next byte
 510     __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_iload), R14_bcp);
 511 
 512     // if _iload, wait to rewrite to iload2. We only want to rewrite the
 513     // last two iloads in a pair. Comparing against fast_iload means that
 514     // the next bytecode is neither an iload or a caload, and therefore
 515     // an iload pair.
 516     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_iload);
 517     __ beq(CR0, Ldone);
 518 
 519     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
 520     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload2);
 521     __ beq(CR1, Lrewrite);
 522 
 523     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_caload);
 524     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_icaload);
 525     __ beq(CR0, Lrewrite);
 526 
 527     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
 528 
 529     __ bind(Lrewrite);
 530     patch_bytecode(Bytecodes::_iload, Rrewrite_to, Rscratch, false);
 531     __ bind(Ldone);
 532   }
 533 
 534   __ load_local_int(R17_tos, Rindex, Rindex);
 535 }
 536 
 537 // Load 2 integers in a row without dispatching
 538 void TemplateTable::fast_iload2() {
 539   transition(vtos, itos);
 540 
 541   __ lbz(R3_ARG1, 1, R14_bcp);
 542   __ lbz(R17_tos, Bytecodes::length_for(Bytecodes::_iload) + 1, R14_bcp);
 543 
 544   __ load_local_int(R3_ARG1, R11_scratch1, R3_ARG1);
 545   __ load_local_int(R17_tos, R12_scratch2, R17_tos);
 546   __ push_i(R3_ARG1);
 547 }
 548 
 549 void TemplateTable::fast_iload() {
 550   transition(vtos, itos);
 551   // Get the local value into tos
 552 
 553   const Register Rindex = R11_scratch1;
 554   locals_index(Rindex);
 555   __ load_local_int(R17_tos, Rindex, Rindex);
 556 }
 557 
 558 // Load a local variable type long from locals area to TOS cache register.
 559 // Local index resides in bytecodestream.
 560 void TemplateTable::lload() {
 561   transition(vtos, ltos);
 562 
 563   const Register Rindex = R11_scratch1;
 564   locals_index(Rindex);
 565   __ load_local_long(R17_tos, Rindex, Rindex);
 566 }
 567 
 568 void TemplateTable::fload() {
 569   transition(vtos, ftos);
 570 
 571   const Register Rindex = R11_scratch1;
 572   locals_index(Rindex);
 573   __ load_local_float(F15_ftos, Rindex, Rindex);
 574 }
 575 
 576 void TemplateTable::dload() {
 577   transition(vtos, dtos);
 578 
 579   const Register Rindex = R11_scratch1;
 580   locals_index(Rindex);
 581   __ load_local_double(F15_ftos, Rindex, Rindex);
 582 }
 583 
 584 void TemplateTable::aload() {
 585   transition(vtos, atos);
 586 
 587   const Register Rindex = R11_scratch1;
 588   locals_index(Rindex);
 589   __ load_local_ptr(R17_tos, Rindex, Rindex);
 590 }
 591 
 592 void TemplateTable::locals_index_wide(Register Rdst) {
 593   // Offset is 2, not 1, because Lbcp points to wide prefix code.
 594   __ get_2_byte_integer_at_bcp(2, Rdst, InterpreterMacroAssembler::Unsigned);
 595 }
 596 
 597 void TemplateTable::wide_iload() {
 598   // Get the local value into tos.
 599 
 600   const Register Rindex = R11_scratch1;
 601   locals_index_wide(Rindex);
 602   __ load_local_int(R17_tos, Rindex, Rindex);
 603 }
 604 
 605 void TemplateTable::wide_lload() {
 606   transition(vtos, ltos);
 607 
 608   const Register Rindex = R11_scratch1;
 609   locals_index_wide(Rindex);
 610   __ load_local_long(R17_tos, Rindex, Rindex);
 611 }
 612 
 613 void TemplateTable::wide_fload() {
 614   transition(vtos, ftos);
 615 
 616   const Register Rindex = R11_scratch1;
 617   locals_index_wide(Rindex);
 618   __ load_local_float(F15_ftos, Rindex, Rindex);
 619 }
 620 
 621 void TemplateTable::wide_dload() {
 622   transition(vtos, dtos);
 623 
 624   const Register Rindex = R11_scratch1;
 625   locals_index_wide(Rindex);
 626   __ load_local_double(F15_ftos, Rindex, Rindex);
 627 }
 628 
 629 void TemplateTable::wide_aload() {
 630   transition(vtos, atos);
 631 
 632   const Register Rindex = R11_scratch1;
 633   locals_index_wide(Rindex);
 634   __ load_local_ptr(R17_tos, Rindex, Rindex);
 635 }
 636 
 637 void TemplateTable::iaload() {
 638   transition(itos, itos);
 639 
 640   const Register Rload_addr = R3_ARG1,
 641                  Rarray     = R4_ARG2,
 642                  Rtemp      = R5_ARG3;
 643   __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
 644   __ lwa(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rload_addr);
 645 }
 646 
 647 void TemplateTable::laload() {
 648   transition(itos, ltos);
 649 
 650   const Register Rload_addr = R3_ARG1,
 651                  Rarray     = R4_ARG2,
 652                  Rtemp      = R5_ARG3;
 653   __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
 654   __ ld(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rload_addr);
 655 }
 656 
 657 void TemplateTable::faload() {
 658   transition(itos, ftos);
 659 
 660   const Register Rload_addr = R3_ARG1,
 661                  Rarray     = R4_ARG2,
 662                  Rtemp      = R5_ARG3;
 663   __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
 664   __ lfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rload_addr);
 665 }
 666 
 667 void TemplateTable::daload() {
 668   transition(itos, dtos);
 669 
 670   const Register Rload_addr = R3_ARG1,
 671                  Rarray     = R4_ARG2,
 672                  Rtemp      = R5_ARG3;
 673   __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
 674   __ lfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rload_addr);
 675 }
 676 
 677 void TemplateTable::aaload() {
 678   transition(itos, atos);
 679 
 680   // tos: index
 681   // result tos: array
 682   const Register Rload_addr = R3_ARG1,
 683                  Rarray     = R4_ARG2,
 684                  Rtemp      = R5_ARG3,
 685                  Rtemp2     = R31;
 686   __ index_check(Rarray, R17_tos /* index */, UseCompressedOops ? 2 : LogBytesPerWord, Rtemp, Rload_addr);
 687   do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2,
 688               IS_ARRAY);
 689   __ verify_oop(R17_tos);
 690   //__ dcbt(R17_tos); // prefetch
 691 }
 692 
 693 void TemplateTable::baload() {
 694   transition(itos, itos);
 695 
 696   const Register Rload_addr = R3_ARG1,
 697                  Rarray     = R4_ARG2,
 698                  Rtemp      = R5_ARG3;
 699   __ index_check(Rarray, R17_tos /* index */, 0, Rtemp, Rload_addr);
 700   __ lbz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rload_addr);
 701   __ extsb(R17_tos, R17_tos);
 702 }
 703 
 704 void TemplateTable::caload() {
 705   transition(itos, itos);
 706 
 707   const Register Rload_addr = R3_ARG1,
 708                  Rarray     = R4_ARG2,
 709                  Rtemp      = R5_ARG3;
 710   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 711   __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
 712 }
 713 
 714 // Iload followed by caload frequent pair.
 715 void TemplateTable::fast_icaload() {
 716   transition(vtos, itos);
 717 
 718   const Register Rload_addr = R3_ARG1,
 719                  Rarray     = R4_ARG2,
 720                  Rtemp      = R11_scratch1;
 721 
 722   locals_index(R17_tos);
 723   __ load_local_int(R17_tos, Rtemp, R17_tos);
 724   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 725   __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
 726 }
 727 
 728 void TemplateTable::saload() {
 729   transition(itos, itos);
 730 
 731   const Register Rload_addr = R11_scratch1,
 732                  Rarray     = R12_scratch2,
 733                  Rtemp      = R3_ARG1;
 734   __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
 735   __ lha(R17_tos, arrayOopDesc::base_offset_in_bytes(T_SHORT), Rload_addr);
 736 }
 737 
 738 void TemplateTable::iload(int n) {
 739   transition(vtos, itos);
 740 
 741   __ lwz(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
 742 }
 743 
 744 void TemplateTable::lload(int n) {
 745   transition(vtos, ltos);
 746 
 747   __ ld(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
 748 }
 749 
 750 void TemplateTable::fload(int n) {
 751   transition(vtos, ftos);
 752 
 753   __ lfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
 754 }
 755 
 756 void TemplateTable::dload(int n) {
 757   transition(vtos, dtos);
 758 
 759   __ lfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
 760 }
 761 
 762 void TemplateTable::aload(int n) {
 763   transition(vtos, atos);
 764 
 765   __ ld(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
 766 }
 767 
 768 void TemplateTable::aload_0() {
 769   aload_0_internal();
 770 }
 771 
 772 void TemplateTable::nofast_aload_0() {
 773   aload_0_internal(may_not_rewrite);
 774 }
 775 
 776 void TemplateTable::aload_0_internal(RewriteControl rc) {
 777   transition(vtos, atos);
 778   // According to bytecode histograms, the pairs:
 779   //
 780   // _aload_0, _fast_igetfield
 781   // _aload_0, _fast_agetfield
 782   // _aload_0, _fast_fgetfield
 783   //
 784   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 785   // _aload_0 bytecode checks if the next bytecode is either
 786   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 787   // rewrites the current bytecode into a pair bytecode; otherwise it
 788   // rewrites the current bytecode into _0 that doesn't do
 789   // the pair check anymore.
 790   //
 791   // Note: If the next bytecode is _getfield, the rewrite must be
 792   //       delayed, otherwise we may miss an opportunity for a pair.
 793   //
 794   // Also rewrite frequent pairs
 795   //   aload_0, aload_1
 796   //   aload_0, iload_1
 797   // These bytecodes with a small amount of code are most profitable
 798   // to rewrite.
 799 
 800   if (RewriteFrequentPairs && rc == may_rewrite) {
 801 
 802     Label Lrewrite, Ldont_rewrite;
 803     Register Rnext_byte  = R3_ARG1,
 804              Rrewrite_to = R6_ARG4,
 805              Rscratch    = R11_scratch1;
 806 
 807     // Get next byte.
 808     __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_aload_0), R14_bcp);
 809 
 810     // If _getfield, wait to rewrite. We only want to rewrite the last two bytecodes in a pair.
 811     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_getfield);
 812     __ beq(CR0, Ldont_rewrite);
 813 
 814     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_igetfield);
 815     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iaccess_0);
 816     __ beq(CR1, Lrewrite);
 817 
 818     __ cmpwi(CR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_agetfield);
 819     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aaccess_0);
 820     __ beq(CR0, Lrewrite);
 821 
 822     __ cmpwi(CR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_fgetfield);
 823     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_faccess_0);
 824     __ beq(CR1, Lrewrite);
 825 
 826     __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aload_0);
 827 
 828     __ bind(Lrewrite);
 829     patch_bytecode(Bytecodes::_aload_0, Rrewrite_to, Rscratch, false);
 830     __ bind(Ldont_rewrite);
 831   }
 832 
 833   // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
 834   aload(0);
 835 }
 836 
 837 void TemplateTable::istore() {
 838   transition(itos, vtos);
 839 
 840   const Register Rindex = R11_scratch1;
 841   locals_index(Rindex);
 842   __ store_local_int(R17_tos, Rindex);
 843 }
 844 
 845 void TemplateTable::lstore() {
 846   transition(ltos, vtos);
 847   const Register Rindex = R11_scratch1;
 848   locals_index(Rindex);
 849   __ store_local_long(R17_tos, Rindex);
 850 }
 851 
 852 void TemplateTable::fstore() {
 853   transition(ftos, vtos);
 854 
 855   const Register Rindex = R11_scratch1;
 856   locals_index(Rindex);
 857   __ store_local_float(F15_ftos, Rindex);
 858 }
 859 
 860 void TemplateTable::dstore() {
 861   transition(dtos, vtos);
 862 
 863   const Register Rindex = R11_scratch1;
 864   locals_index(Rindex);
 865   __ store_local_double(F15_ftos, Rindex);
 866 }
 867 
 868 void TemplateTable::astore() {
 869   transition(vtos, vtos);
 870 
 871   const Register Rindex = R11_scratch1;
 872   __ pop_ptr();
 873   __ verify_oop_or_return_address(R17_tos, Rindex);
 874   locals_index(Rindex);
 875   __ store_local_ptr(R17_tos, Rindex);
 876 }
 877 
 878 void TemplateTable::wide_istore() {
 879   transition(vtos, vtos);
 880 
 881   const Register Rindex = R11_scratch1;
 882   __ pop_i();
 883   locals_index_wide(Rindex);
 884   __ store_local_int(R17_tos, Rindex);
 885 }
 886 
 887 void TemplateTable::wide_lstore() {
 888   transition(vtos, vtos);
 889 
 890   const Register Rindex = R11_scratch1;
 891   __ pop_l();
 892   locals_index_wide(Rindex);
 893   __ store_local_long(R17_tos, Rindex);
 894 }
 895 
 896 void TemplateTable::wide_fstore() {
 897   transition(vtos, vtos);
 898 
 899   const Register Rindex = R11_scratch1;
 900   __ pop_f();
 901   locals_index_wide(Rindex);
 902   __ store_local_float(F15_ftos, Rindex);
 903 }
 904 
 905 void TemplateTable::wide_dstore() {
 906   transition(vtos, vtos);
 907 
 908   const Register Rindex = R11_scratch1;
 909   __ pop_d();
 910   locals_index_wide(Rindex);
 911   __ store_local_double(F15_ftos, Rindex);
 912 }
 913 
 914 void TemplateTable::wide_astore() {
 915   transition(vtos, vtos);
 916 
 917   const Register Rindex = R11_scratch1;
 918   __ pop_ptr();
 919   __ verify_oop_or_return_address(R17_tos, Rindex);
 920   locals_index_wide(Rindex);
 921   __ store_local_ptr(R17_tos, Rindex);
 922 }
 923 
 924 void TemplateTable::iastore() {
 925   transition(itos, vtos);
 926 
 927   const Register Rindex      = R3_ARG1,
 928                  Rstore_addr = R4_ARG2,
 929                  Rarray      = R5_ARG3,
 930                  Rtemp       = R6_ARG4;
 931   __ pop_i(Rindex);
 932   __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
 933   __ stw(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rstore_addr);
 934   }
 935 
 936 void TemplateTable::lastore() {
 937   transition(ltos, vtos);
 938 
 939   const Register Rindex      = R3_ARG1,
 940                  Rstore_addr = R4_ARG2,
 941                  Rarray      = R5_ARG3,
 942                  Rtemp       = R6_ARG4;
 943   __ pop_i(Rindex);
 944   __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
 945   __ std(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rstore_addr);
 946   }
 947 
 948 void TemplateTable::fastore() {
 949   transition(ftos, vtos);
 950 
 951   const Register Rindex      = R3_ARG1,
 952                  Rstore_addr = R4_ARG2,
 953                  Rarray      = R5_ARG3,
 954                  Rtemp       = R6_ARG4;
 955   __ pop_i(Rindex);
 956   __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
 957   __ stfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rstore_addr);
 958   }
 959 
 960 void TemplateTable::dastore() {
 961   transition(dtos, vtos);
 962 
 963   const Register Rindex      = R3_ARG1,
 964                  Rstore_addr = R4_ARG2,
 965                  Rarray      = R5_ARG3,
 966                  Rtemp       = R6_ARG4;
 967   __ pop_i(Rindex);
 968   __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
 969   __ stfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rstore_addr);
 970   }
 971 
 972 // Pop 3 values from the stack and...
 973 void TemplateTable::aastore() {
 974   transition(vtos, vtos);
 975 
 976   Label Lstore_ok, Lis_null, Ldone;
 977   const Register Rindex    = R3_ARG1,
 978                  Rarray    = R4_ARG2,
 979                  Rscratch  = R11_scratch1,
 980                  Rscratch2 = R12_scratch2,
 981                  Rarray_klass = R5_ARG3,
 982                  Rarray_element_klass = Rarray_klass,
 983                  Rvalue_klass = R6_ARG4,
 984                  Rstore_addr = R31;    // Use register which survives VM call.
 985 
 986   __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp); // Get value to store.
 987   __ lwz(Rindex, Interpreter::expr_offset_in_bytes(1), R15_esp); // Get index.
 988   __ ld(Rarray, Interpreter::expr_offset_in_bytes(2), R15_esp);  // Get array.
 989 
 990   __ verify_oop(R17_tos);
 991   __ index_check_without_pop(Rarray, Rindex, UseCompressedOops ? 2 : LogBytesPerWord, Rscratch, Rstore_addr);
 992   // Rindex is dead!
 993   Register Rscratch3 = Rindex;
 994 
 995   // Do array store check - check for null value first.
 996   __ cmpdi(CR0, R17_tos, 0);
 997   __ beq(CR0, Lis_null);
 998 
 999   __ load_klass(Rarray_klass, Rarray);
1000   __ load_klass(Rvalue_klass, R17_tos);
1001 
1002   // Do fast instanceof cache test.
1003   __ ld(Rarray_element_klass, in_bytes(ObjArrayKlass::element_klass_offset()), Rarray_klass);
1004 
1005   // Generate a fast subtype check. Branch to store_ok if no failure. Throw if failure.
1006   __ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/, Rscratch, Rscratch2, Rscratch3, Lstore_ok);
1007 
1008   // Fell through: subtype check failed => throw an exception.
1009   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArrayStoreException_entry);
1010   __ mtctr(R11_scratch1);
1011   __ bctr();
1012 
1013   __ bind(Lis_null);
1014   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), noreg /* 0 */,
1015                Rscratch, Rscratch2, Rscratch3, IS_ARRAY);
1016   __ profile_null_seen(Rscratch, Rscratch2);
1017   __ b(Ldone);
1018 
1019   // Store is OK.
1020   __ bind(Lstore_ok);
1021   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */,
1022                Rscratch, Rscratch2, Rscratch3, IS_ARRAY | IS_NOT_NULL);
1023 
1024   __ bind(Ldone);
1025   // Adjust sp (pops array, index and value).
1026   __ addi(R15_esp, R15_esp, 3 * Interpreter::stackElementSize);
1027 }
1028 
1029 void TemplateTable::bastore() {
1030   transition(itos, vtos);
1031 
1032   const Register Rindex   = R11_scratch1,
1033                  Rarray   = R12_scratch2,
1034                  Rscratch = R3_ARG1;
1035   __ pop_i(Rindex);
1036   __ pop_ptr(Rarray);
1037   // tos: val
1038 
1039   // Need to check whether array is boolean or byte
1040   // since both types share the bastore bytecode.
1041   __ load_klass_check_null_throw(Rscratch, Rarray, Rscratch);
1042   __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch);
1043   int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
1044   __ testbitdi(CR0, R0, Rscratch, diffbit);
1045   Label L_skip;
1046   __ bfalse(CR0, L_skip);
1047   __ andi(R17_tos, R17_tos, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1048   __ bind(L_skip);
1049 
1050   __ index_check_without_pop(Rarray, Rindex, 0, Rscratch, Rarray);
1051   __ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray);
1052 }
1053 
1054 void TemplateTable::castore() {
1055   transition(itos, vtos);
1056 
1057   const Register Rindex   = R11_scratch1,
1058                  Rarray   = R12_scratch2,
1059                  Rscratch = R3_ARG1;
1060   __ pop_i(Rindex);
1061   // tos: val
1062   // Rarray: array ptr (popped by index_check)
1063   __ index_check(Rarray, Rindex, LogBytesPerShort, Rscratch, Rarray);
1064   __ sth(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rarray);
1065 }
1066 
1067 void TemplateTable::sastore() {
1068   castore();
1069 }
1070 
1071 void TemplateTable::istore(int n) {
1072   transition(itos, vtos);
1073   __ stw(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
1074 }
1075 
1076 void TemplateTable::lstore(int n) {
1077   transition(ltos, vtos);
1078   __ std(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
1079 }
1080 
1081 void TemplateTable::fstore(int n) {
1082   transition(ftos, vtos);
1083   __ stfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
1084 }
1085 
1086 void TemplateTable::dstore(int n) {
1087   transition(dtos, vtos);
1088   __ stfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
1089 }
1090 
1091 void TemplateTable::astore(int n) {
1092   transition(vtos, vtos);
1093 
1094   __ pop_ptr();
1095   __ verify_oop_or_return_address(R17_tos, R11_scratch1);
1096   __ std(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
1097 }
1098 
1099 void TemplateTable::pop() {
1100   transition(vtos, vtos);
1101 
1102   __ addi(R15_esp, R15_esp, Interpreter::stackElementSize);
1103 }
1104 
1105 void TemplateTable::pop2() {
1106   transition(vtos, vtos);
1107 
1108   __ addi(R15_esp, R15_esp, Interpreter::stackElementSize * 2);
1109 }
1110 
1111 void TemplateTable::dup() {
1112   transition(vtos, vtos);
1113 
1114   __ ld(R11_scratch1, Interpreter::stackElementSize, R15_esp);
1115   __ push_ptr(R11_scratch1);
1116 }
1117 
1118 void TemplateTable::dup_x1() {
1119   transition(vtos, vtos);
1120 
1121   Register Ra = R11_scratch1,
1122            Rb = R12_scratch2;
1123   // stack: ..., a, b
1124   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1125   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1126   __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
1127   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1128   __ push_ptr(Rb);
1129   // stack: ..., b, a, b
1130 }
1131 
1132 void TemplateTable::dup_x2() {
1133   transition(vtos, vtos);
1134 
1135   Register Ra = R11_scratch1,
1136            Rb = R12_scratch2,
1137            Rc = R3_ARG1;
1138 
1139   // stack: ..., a, b, c
1140   __ ld(Rc, Interpreter::stackElementSize,     R15_esp);  // load c
1141   __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp);  // load a
1142   __ std(Rc, Interpreter::stackElementSize * 3, R15_esp); // store c in a
1143   __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp);  // load b
1144   // stack: ..., c, b, c
1145   __ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in b
1146   // stack: ..., c, a, c
1147   __ std(Rb, Interpreter::stackElementSize,     R15_esp); // store b in c
1148   __ push_ptr(Rc);                                        // push c
1149   // stack: ..., c, a, b, c
1150 }
1151 
1152 void TemplateTable::dup2() {
1153   transition(vtos, vtos);
1154 
1155   Register Ra = R11_scratch1,
1156            Rb = R12_scratch2;
1157   // stack: ..., a, b
1158   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1159   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1160   __ push_2ptrs(Ra, Rb);
1161   // stack: ..., a, b, a, b
1162 }
1163 
1164 void TemplateTable::dup2_x1() {
1165   transition(vtos, vtos);
1166 
1167   Register Ra = R11_scratch1,
1168            Rb = R12_scratch2,
1169            Rc = R3_ARG1;
1170   // stack: ..., a, b, c
1171   __ ld(Rc, Interpreter::stackElementSize,     R15_esp);
1172   __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp);
1173   __ std(Rc, Interpreter::stackElementSize * 2, R15_esp);
1174   __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp);
1175   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1176   __ std(Rb, Interpreter::stackElementSize * 3, R15_esp);
1177   // stack: ..., b, c, a
1178   __ push_2ptrs(Rb, Rc);
1179   // stack: ..., b, c, a, b, c
1180 }
1181 
1182 void TemplateTable::dup2_x2() {
1183   transition(vtos, vtos);
1184 
1185   Register Ra = R11_scratch1,
1186            Rb = R12_scratch2,
1187            Rc = R3_ARG1,
1188            Rd = R4_ARG2;
1189   // stack: ..., a, b, c, d
1190   __ ld(Rb, Interpreter::stackElementSize * 3, R15_esp);
1191   __ ld(Rd, Interpreter::stackElementSize,     R15_esp);
1192   __ std(Rb, Interpreter::stackElementSize,     R15_esp);  // store b in d
1193   __ std(Rd, Interpreter::stackElementSize * 3, R15_esp);  // store d in b
1194   __ ld(Ra, Interpreter::stackElementSize * 4, R15_esp);
1195   __ ld(Rc, Interpreter::stackElementSize * 2, R15_esp);
1196   __ std(Ra, Interpreter::stackElementSize * 2, R15_esp);  // store a in c
1197   __ std(Rc, Interpreter::stackElementSize * 4, R15_esp);  // store c in a
1198   // stack: ..., c, d, a, b
1199   __ push_2ptrs(Rc, Rd);
1200   // stack: ..., c, d, a, b, c, d
1201 }
1202 
1203 void TemplateTable::swap() {
1204   transition(vtos, vtos);
1205   // stack: ..., a, b
1206 
1207   Register Ra = R11_scratch1,
1208            Rb = R12_scratch2;
1209   // stack: ..., a, b
1210   __ ld(Rb, Interpreter::stackElementSize,     R15_esp);
1211   __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
1212   __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
1213   __ std(Ra, Interpreter::stackElementSize,     R15_esp);
1214   // stack: ..., b, a
1215 }
1216 
1217 void TemplateTable::iop2(Operation op) {
1218   transition(itos, itos);
1219 
1220   Register Rscratch = R11_scratch1;
1221 
1222   __ pop_i(Rscratch);
1223   // tos  = number of bits to shift
1224   // Rscratch = value to shift
1225   switch (op) {
1226     case  add:   __ add(R17_tos, Rscratch, R17_tos); break;
1227     case  sub:   __ sub(R17_tos, Rscratch, R17_tos); break;
1228     case  mul:   __ mullw(R17_tos, Rscratch, R17_tos); break;
1229     case  _and:  __ andr(R17_tos, Rscratch, R17_tos); break;
1230     case  _or:   __ orr(R17_tos, Rscratch, R17_tos); break;
1231     case  _xor:  __ xorr(R17_tos, Rscratch, R17_tos); break;
1232     case  shl:   __ rldicl(R17_tos, R17_tos, 0, 64-5); __ slw(R17_tos, Rscratch, R17_tos); break;
1233     case  shr:   __ rldicl(R17_tos, R17_tos, 0, 64-5); __ sraw(R17_tos, Rscratch, R17_tos); break;
1234     case  ushr:  __ rldicl(R17_tos, R17_tos, 0, 64-5); __ srw(R17_tos, Rscratch, R17_tos); break;
1235     default:     ShouldNotReachHere();
1236   }
1237 }
1238 
1239 void TemplateTable::lop2(Operation op) {
1240   transition(ltos, ltos);
1241 
1242   Register Rscratch = R11_scratch1;
1243   __ pop_l(Rscratch);
1244   switch (op) {
1245     case  add:   __ add(R17_tos, Rscratch, R17_tos); break;
1246     case  sub:   __ sub(R17_tos, Rscratch, R17_tos); break;
1247     case  _and:  __ andr(R17_tos, Rscratch, R17_tos); break;
1248     case  _or:   __ orr(R17_tos, Rscratch, R17_tos); break;
1249     case  _xor:  __ xorr(R17_tos, Rscratch, R17_tos); break;
1250     default:     ShouldNotReachHere();
1251   }
1252 }
1253 
1254 void TemplateTable::idiv() {
1255   transition(itos, itos);
1256 
1257   Label Lnormal, Lexception, Ldone;
1258   Register Rdividend = R11_scratch1; // Used by irem.
1259 
1260   __ addi(R0, R17_tos, 1);
1261   __ cmplwi(CR0, R0, 2);
1262   __ bgt(CR0, Lnormal); // divisor <-1 or >1
1263 
1264   __ cmpwi(CR1, R17_tos, 0);
1265   __ beq(CR1, Lexception); // divisor == 0
1266 
1267   __ pop_i(Rdividend);
1268   __ mullw(R17_tos, Rdividend, R17_tos); // div by +/-1
1269   __ b(Ldone);
1270 
1271   __ bind(Lexception);
1272   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
1273   __ mtctr(R11_scratch1);
1274   __ bctr();
1275 
1276   __ align(32, 12);
1277   __ bind(Lnormal);
1278   __ pop_i(Rdividend);
1279   __ divw(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
1280   __ bind(Ldone);
1281 }
1282 
1283 void TemplateTable::irem() {
1284   transition(itos, itos);
1285 
1286   __ mr(R12_scratch2, R17_tos);
1287   idiv();
1288   __ mullw(R17_tos, R17_tos, R12_scratch2);
1289   __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by idiv.
1290 }
1291 
1292 void TemplateTable::lmul() {
1293   transition(ltos, ltos);
1294 
1295   __ pop_l(R11_scratch1);
1296   __ mulld(R17_tos, R11_scratch1, R17_tos);
1297 }
1298 
1299 void TemplateTable::ldiv() {
1300   transition(ltos, ltos);
1301 
1302   Label Lnormal, Lexception, Ldone;
1303   Register Rdividend = R11_scratch1; // Used by lrem.
1304 
1305   __ addi(R0, R17_tos, 1);
1306   __ cmpldi(CR0, R0, 2);
1307   __ bgt(CR0, Lnormal); // divisor <-1 or >1
1308 
1309   __ cmpdi(CR1, R17_tos, 0);
1310   __ beq(CR1, Lexception); // divisor == 0
1311 
1312   __ pop_l(Rdividend);
1313   __ mulld(R17_tos, Rdividend, R17_tos); // div by +/-1
1314   __ b(Ldone);
1315 
1316   __ bind(Lexception);
1317   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
1318   __ mtctr(R11_scratch1);
1319   __ bctr();
1320 
1321   __ align(32, 12);
1322   __ bind(Lnormal);
1323   __ pop_l(Rdividend);
1324   __ divd(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
1325   __ bind(Ldone);
1326 }
1327 
1328 void TemplateTable::lrem() {
1329   transition(ltos, ltos);
1330 
1331   __ mr(R12_scratch2, R17_tos);
1332   ldiv();
1333   __ mulld(R17_tos, R17_tos, R12_scratch2);
1334   __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by ldiv.
1335 }
1336 
1337 void TemplateTable::lshl() {
1338   transition(itos, ltos);
1339 
1340   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1341   __ pop_l(R11_scratch1);
1342   __ sld(R17_tos, R11_scratch1, R17_tos);
1343 }
1344 
1345 void TemplateTable::lshr() {
1346   transition(itos, ltos);
1347 
1348   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1349   __ pop_l(R11_scratch1);
1350   __ srad(R17_tos, R11_scratch1, R17_tos);
1351 }
1352 
1353 void TemplateTable::lushr() {
1354   transition(itos, ltos);
1355 
1356   __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
1357   __ pop_l(R11_scratch1);
1358   __ srd(R17_tos, R11_scratch1, R17_tos);
1359 }
1360 
1361 void TemplateTable::fop2(Operation op) {
1362   transition(ftos, ftos);
1363 
1364   switch (op) {
1365     case add: __ pop_f(F0_SCRATCH); __ fadds(F15_ftos, F0_SCRATCH, F15_ftos); break;
1366     case sub: __ pop_f(F0_SCRATCH); __ fsubs(F15_ftos, F0_SCRATCH, F15_ftos); break;
1367     case mul: __ pop_f(F0_SCRATCH); __ fmuls(F15_ftos, F0_SCRATCH, F15_ftos); break;
1368     case div: __ pop_f(F0_SCRATCH); __ fdivs(F15_ftos, F0_SCRATCH, F15_ftos); break;
1369     case rem:
1370       __ pop_f(F1_ARG1);
1371       __ fmr(F2_ARG2, F15_ftos);
1372       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
1373       __ fmr(F15_ftos, F1_RET);
1374       break;
1375 
1376     default: ShouldNotReachHere();
1377   }
1378 }
1379 
1380 void TemplateTable::dop2(Operation op) {
1381   transition(dtos, dtos);
1382 
1383   switch (op) {
1384     case add: __ pop_d(F0_SCRATCH); __ fadd(F15_ftos, F0_SCRATCH, F15_ftos); break;
1385     case sub: __ pop_d(F0_SCRATCH); __ fsub(F15_ftos, F0_SCRATCH, F15_ftos); break;
1386     case mul: __ pop_d(F0_SCRATCH); __ fmul(F15_ftos, F0_SCRATCH, F15_ftos); break;
1387     case div: __ pop_d(F0_SCRATCH); __ fdiv(F15_ftos, F0_SCRATCH, F15_ftos); break;
1388     case rem:
1389       __ pop_d(F1_ARG1);
1390       __ fmr(F2_ARG2, F15_ftos);
1391       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
1392       __ fmr(F15_ftos, F1_RET);
1393       break;
1394 
1395     default: ShouldNotReachHere();
1396   }
1397 }
1398 
1399 // Negate the value in the TOS cache.
1400 void TemplateTable::ineg() {
1401   transition(itos, itos);
1402 
1403   __ neg(R17_tos, R17_tos);
1404 }
1405 
1406 // Negate the value in the TOS cache.
1407 void TemplateTable::lneg() {
1408   transition(ltos, ltos);
1409 
1410   __ neg(R17_tos, R17_tos);
1411 }
1412 
1413 void TemplateTable::fneg() {
1414   transition(ftos, ftos);
1415 
1416   __ fneg(F15_ftos, F15_ftos);
1417 }
1418 
1419 void TemplateTable::dneg() {
1420   transition(dtos, dtos);
1421 
1422   __ fneg(F15_ftos, F15_ftos);
1423 }
1424 
1425 // Increments a local variable in place.
1426 void TemplateTable::iinc() {
1427   transition(vtos, vtos);
1428 
1429   const Register Rindex     = R11_scratch1,
1430                  Rincrement = R0,
1431                  Rvalue     = R12_scratch2;
1432 
1433   locals_index(Rindex);              // Load locals index from bytecode stream.
1434   __ lbz(Rincrement, 2, R14_bcp);    // Load increment from the bytecode stream.
1435   __ extsb(Rincrement, Rincrement);
1436 
1437   __ load_local_int(Rvalue, Rindex, Rindex); // Puts address of local into Rindex.
1438 
1439   __ add(Rvalue, Rincrement, Rvalue);
1440   __ stw(Rvalue, 0, Rindex);
1441 }
1442 
1443 void TemplateTable::wide_iinc() {
1444   transition(vtos, vtos);
1445 
1446   Register Rindex       = R11_scratch1,
1447            Rlocals_addr = Rindex,
1448            Rincr        = R12_scratch2;
1449   locals_index_wide(Rindex);
1450   __ get_2_byte_integer_at_bcp(4, Rincr, InterpreterMacroAssembler::Signed);
1451   __ load_local_int(R17_tos, Rlocals_addr, Rindex);
1452   __ add(R17_tos, Rincr, R17_tos);
1453   __ stw(R17_tos, 0, Rlocals_addr);
1454 }
1455 
1456 void TemplateTable::convert() {
1457   // %%%%% Factor this first part across platforms
1458 #ifdef ASSERT
1459   TosState tos_in  = ilgl;
1460   TosState tos_out = ilgl;
1461   switch (bytecode()) {
1462     case Bytecodes::_i2l: // fall through
1463     case Bytecodes::_i2f: // fall through
1464     case Bytecodes::_i2d: // fall through
1465     case Bytecodes::_i2b: // fall through
1466     case Bytecodes::_i2c: // fall through
1467     case Bytecodes::_i2s: tos_in = itos; break;
1468     case Bytecodes::_l2i: // fall through
1469     case Bytecodes::_l2f: // fall through
1470     case Bytecodes::_l2d: tos_in = ltos; break;
1471     case Bytecodes::_f2i: // fall through
1472     case Bytecodes::_f2l: // fall through
1473     case Bytecodes::_f2d: tos_in = ftos; break;
1474     case Bytecodes::_d2i: // fall through
1475     case Bytecodes::_d2l: // fall through
1476     case Bytecodes::_d2f: tos_in = dtos; break;
1477     default             : ShouldNotReachHere();
1478   }
1479   switch (bytecode()) {
1480     case Bytecodes::_l2i: // fall through
1481     case Bytecodes::_f2i: // fall through
1482     case Bytecodes::_d2i: // fall through
1483     case Bytecodes::_i2b: // fall through
1484     case Bytecodes::_i2c: // fall through
1485     case Bytecodes::_i2s: tos_out = itos; break;
1486     case Bytecodes::_i2l: // fall through
1487     case Bytecodes::_f2l: // fall through
1488     case Bytecodes::_d2l: tos_out = ltos; break;
1489     case Bytecodes::_i2f: // fall through
1490     case Bytecodes::_l2f: // fall through
1491     case Bytecodes::_d2f: tos_out = ftos; break;
1492     case Bytecodes::_i2d: // fall through
1493     case Bytecodes::_l2d: // fall through
1494     case Bytecodes::_f2d: tos_out = dtos; break;
1495     default             : ShouldNotReachHere();
1496   }
1497   transition(tos_in, tos_out);
1498 #endif
1499 
1500   // Conversion
1501   Label done;
1502   switch (bytecode()) {
1503     case Bytecodes::_i2l:
1504       __ extsw(R17_tos, R17_tos);
1505       break;
1506 
1507     case Bytecodes::_l2i:
1508       // Nothing to do, we'll continue to work with the lower bits.
1509       break;
1510 
1511     case Bytecodes::_i2b:
1512       __ extsb(R17_tos, R17_tos);
1513       break;
1514 
1515     case Bytecodes::_i2c:
1516       __ rldicl(R17_tos, R17_tos, 0, 64-2*8);
1517       break;
1518 
1519     case Bytecodes::_i2s:
1520       __ extsh(R17_tos, R17_tos);
1521       break;
1522 
1523     case Bytecodes::_i2d:
1524       __ extsw(R17_tos, R17_tos);
1525     case Bytecodes::_l2d:
1526       __ move_l_to_d();
1527       __ fcfid(F15_ftos, F15_ftos);
1528       break;
1529 
1530     case Bytecodes::_i2f:
1531       __ extsw(R17_tos, R17_tos);
1532       __ move_l_to_d();
1533       __ fcfids(F15_ftos, F15_ftos);
1534       break;
1535 
1536     case Bytecodes::_l2f:
1537       __ move_l_to_d();
1538       __ fcfids(F15_ftos, F15_ftos);
1539       break;
1540 
1541     case Bytecodes::_f2d:
1542       // empty
1543       break;
1544 
1545     case Bytecodes::_d2f:
1546       __ frsp(F15_ftos, F15_ftos);
1547       break;
1548 
1549     case Bytecodes::_d2i:
1550     case Bytecodes::_f2i:
1551       __ fcmpu(CR0, F15_ftos, F15_ftos);
1552       __ li(R17_tos, 0); // 0 in case of NAN
1553       __ bso(CR0, done);
1554       __ fctiwz(F15_ftos, F15_ftos);
1555       __ move_d_to_l();
1556       break;
1557 
1558     case Bytecodes::_d2l:
1559     case Bytecodes::_f2l:
1560       __ fcmpu(CR0, F15_ftos, F15_ftos);
1561       __ li(R17_tos, 0); // 0 in case of NAN
1562       __ bso(CR0, done);
1563       __ fctidz(F15_ftos, F15_ftos);
1564       __ move_d_to_l();
1565       break;
1566 
1567     default: ShouldNotReachHere();
1568   }
1569   __ bind(done);
1570 }
1571 
1572 // Long compare
1573 void TemplateTable::lcmp() {
1574   transition(ltos, itos);
1575 
1576   const Register Rscratch = R11_scratch1;
1577   __ pop_l(Rscratch); // first operand, deeper in stack
1578 
1579   __ cmpd(CR0, Rscratch, R17_tos); // compare
1580   __ set_cmp3(R17_tos); // set result as follows: <: -1, =: 0, >: 1
1581 }
1582 
1583 // fcmpl/fcmpg and dcmpl/dcmpg bytecodes
1584 // unordered_result == -1 => fcmpl or dcmpl
1585 // unordered_result ==  1 => fcmpg or dcmpg
1586 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1587   const FloatRegister Rfirst  = F0_SCRATCH,
1588                       Rsecond = F15_ftos;
1589   const Register Rscratch = R11_scratch1;
1590 
1591   if (is_float) {
1592     __ pop_f(Rfirst);
1593   } else {
1594     __ pop_d(Rfirst);
1595   }
1596 
1597   __ fcmpu(CR0, Rfirst, Rsecond); // compare
1598   // if unordered_result is 1, treat unordered_result like 'greater than'
1599   assert(unordered_result == 1 || unordered_result == -1, "unordered_result can be either 1 or -1");
1600   __ set_cmpu3(R17_tos, unordered_result != 1);
1601 }
1602 
1603 // Branch_conditional which takes TemplateTable::Condition.
1604 void TemplateTable::branch_conditional(ConditionRegister crx, TemplateTable::Condition cc, Label& L, bool invert) {
1605   bool positive = false;
1606   Assembler::Condition cond = Assembler::equal;
1607   switch (cc) {
1608     case TemplateTable::equal:         positive = true ; cond = Assembler::equal  ; break;
1609     case TemplateTable::not_equal:     positive = false; cond = Assembler::equal  ; break;
1610     case TemplateTable::less:          positive = true ; cond = Assembler::less   ; break;
1611     case TemplateTable::less_equal:    positive = false; cond = Assembler::greater; break;
1612     case TemplateTable::greater:       positive = true ; cond = Assembler::greater; break;
1613     case TemplateTable::greater_equal: positive = false; cond = Assembler::less   ; break;
1614     default: ShouldNotReachHere();
1615   }
1616   int bo = (positive != invert) ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
1617   int bi = Assembler::bi0(crx, cond);
1618   __ bc(bo, bi, L);
1619 }
1620 
1621 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1622 
1623   const Register Rscratch1    = R11_scratch1,
1624                  Rscratch2    = R12_scratch2,
1625                  Rscratch3    = R3_ARG1,
1626                  R4_counters  = R4_ARG2,
1627                  bumped_count = R31,
1628                  Rdisp        = R22_tmp2;
1629 
1630   __ profile_taken_branch(Rscratch1, bumped_count);
1631 
1632   // Get (wide) offset.
1633   if (is_wide) {
1634     __ get_4_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
1635   } else {
1636     __ get_2_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
1637   }
1638 
1639   // --------------------------------------------------------------------------
1640   // Handle all the JSR stuff here, then exit.
1641   // It's much shorter and cleaner than intermingling with the
1642   // non-JSR normal-branch stuff occurring below.
1643   if (is_jsr) {
1644     // Compute return address as bci in Otos_i.
1645     __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
1646     __ addi(Rscratch2, R14_bcp, -in_bytes(ConstMethod::codes_offset()) + (is_wide ? 5 : 3));
1647     __ subf(R17_tos, Rscratch1, Rscratch2);
1648 
1649     // Bump bcp to target of JSR.
1650     __ add(R14_bcp, Rdisp, R14_bcp);
1651     // Push returnAddress for "ret" on stack.
1652     __ push_ptr(R17_tos);
1653     // And away we go!
1654     __ dispatch_next(vtos, 0 ,true);
1655     return;
1656   }
1657 
1658   // --------------------------------------------------------------------------
1659   // Normal (non-jsr) branch handling
1660 
1661   // Bump bytecode pointer by displacement (take the branch).
1662   __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
1663 
1664   const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1665   if (increment_invocation_counter_for_backward_branches) {
1666     Label Lforward;
1667 
1668     // Check branch direction.
1669     __ cmpdi(CR0, Rdisp, 0);
1670     __ bgt(CR0, Lforward);
1671 
1672     __ get_method_counters(R19_method, R4_counters, Lforward);
1673 
1674     Label Lno_mdo, Loverflow;
1675     const int increment = InvocationCounter::count_increment;
1676     if (ProfileInterpreter) {
1677       Register Rmdo = Rscratch1;
1678 
1679       // If no method data exists, go to profile_continue.
1680       __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
1681       __ cmpdi(CR0, Rmdo, 0);
1682       __ beq(CR0, Lno_mdo);
1683 
1684       // Increment backedge counter in the MDO.
1685       const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
1686       __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
1687       __ lwz(Rscratch3, in_bytes(MethodData::backedge_mask_offset()), Rmdo);
1688       __ addi(Rscratch2, Rscratch2, increment);
1689       __ stw(Rscratch2, mdo_bc_offs, Rmdo);
1690       if (UseOnStackReplacement) {
1691         __ and_(Rscratch3, Rscratch2, Rscratch3);
1692         __ bne(CR0, Lforward);
1693         __ b(Loverflow);
1694       } else {
1695         __ b(Lforward);
1696       }
1697     }
1698 
1699     // If there's no MDO, increment counter in method.
1700     const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
1701     __ bind(Lno_mdo);
1702     __ lwz(Rscratch2, mo_bc_offs, R4_counters);
1703     __ lwz(Rscratch3, in_bytes(MethodCounters::backedge_mask_offset()), R4_counters);
1704     __ addi(Rscratch2, Rscratch2, increment);
1705     __ stw(Rscratch2, mo_bc_offs, R4_counters);
1706     if (UseOnStackReplacement) {
1707       __ and_(Rscratch3, Rscratch2, Rscratch3);
1708       __ bne(CR0, Lforward);
1709     } else {
1710       __ b(Lforward);
1711     }
1712     __ bind(Loverflow);
1713 
1714     // Notify point for loop, pass branch bytecode.
1715     __ subf(R4_ARG2, Rdisp, R14_bcp); // Compute branch bytecode (previous bcp).
1716     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
1717 
1718     // Was an OSR adapter generated?
1719     __ cmpdi(CR0, R3_RET, 0);
1720     __ beq(CR0, Lforward);
1721 
1722     // Has the nmethod been invalidated already?
1723     __ lbz(R0, in_bytes(nmethod::state_offset()), R3_RET);
1724     __ cmpwi(CR0, R0, nmethod::in_use);
1725     __ bne(CR0, Lforward);
1726 
1727     // Migrate the interpreter frame off of the stack.
1728     // We can use all registers because we will not return to interpreter from this point.
1729 
1730     // Save nmethod.
1731     const Register osr_nmethod = R31;
1732     __ mr(osr_nmethod, R3_RET);
1733     __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1);
1734     JFR_ONLY(__ enter_jfr_critical_section();)
1735     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread);
1736     __ reset_last_Java_frame();
1737     // OSR buffer is in ARG1.
1738 
1739     // Remove the interpreter frame.
1740     __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R12_scratch2, R11_scratch1, R0);
1741     JFR_ONLY(__ leave_jfr_critical_section();)
1742 
1743     // Jump to the osr code.
1744     __ ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod);
1745     __ mtlr(R12_scratch2);
1746     __ mtctr(R11_scratch1);
1747     __ bctr();
1748 
1749     __ bind(Lforward);
1750   }
1751   __ dispatch_next(vtos, 0, true);
1752 }
1753 
1754 // Helper function for if_cmp* methods below.
1755 // Factored out common compare and branch code.
1756 void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0) {
1757   Label Lnot_taken;
1758   // Note: The condition code we get is the condition under which we
1759   // *fall through*! So we have to inverse the CC here.
1760 
1761   if (is_jint) {
1762     if (cmp0) {
1763       __ cmpwi(CR0, Rfirst, 0);
1764     } else {
1765       __ cmpw(CR0, Rfirst, Rsecond);
1766     }
1767   } else {
1768     if (cmp0) {
1769       __ cmpdi(CR0, Rfirst, 0);
1770     } else {
1771       __ cmpd(CR0, Rfirst, Rsecond);
1772     }
1773   }
1774   branch_conditional(CR0, cc, Lnot_taken, /*invert*/ true);
1775 
1776   // Conition is false => Jump!
1777   branch(false, false);
1778 
1779   // Condition is not true => Continue.
1780   __ align(32, 12);
1781   __ bind(Lnot_taken);
1782   __ profile_not_taken_branch(Rscratch1, Rscratch2);
1783 }
1784 
1785 // Compare integer values with zero and fall through if CC holds, branch away otherwise.
1786 void TemplateTable::if_0cmp(Condition cc) {
1787   transition(itos, vtos);
1788 
1789   if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, true, true);
1790 }
1791 
1792 // Compare integer values and fall through if CC holds, branch away otherwise.
1793 //
1794 // Interface:
1795 //  - Rfirst: First operand  (older stack value)
1796 //  - tos:    Second operand (younger stack value)
1797 void TemplateTable::if_icmp(Condition cc) {
1798   transition(itos, vtos);
1799 
1800   const Register Rfirst  = R0,
1801                  Rsecond = R17_tos;
1802 
1803   __ pop_i(Rfirst);
1804   if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, true, false);
1805 }
1806 
1807 void TemplateTable::if_nullcmp(Condition cc) {
1808   transition(atos, vtos);
1809 
1810   if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, false, true);
1811 }
1812 
1813 void TemplateTable::if_acmp(Condition cc) {
1814   transition(atos, vtos);
1815 
1816   const Register Rfirst  = R0,
1817                  Rsecond = R17_tos;
1818 
1819   __ pop_ptr(Rfirst);
1820   if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, false, false);
1821 }
1822 
1823 void TemplateTable::ret() {
1824   locals_index(R11_scratch1);
1825   __ load_local_ptr(R17_tos, R11_scratch1, R11_scratch1);
1826 
1827   __ profile_ret(vtos, R17_tos, R11_scratch1, R12_scratch2);
1828 
1829   __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
1830   __ add(R11_scratch1, R17_tos, R11_scratch1);
1831   __ addi(R14_bcp, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
1832   __ dispatch_next(vtos, 0, true);
1833 }
1834 
1835 void TemplateTable::wide_ret() {
1836   transition(vtos, vtos);
1837 
1838   const Register Rindex = R3_ARG1,
1839                  Rscratch1 = R11_scratch1,
1840                  Rscratch2 = R12_scratch2;
1841 
1842   locals_index_wide(Rindex);
1843   __ load_local_ptr(R17_tos, R17_tos, Rindex);
1844   __ profile_ret(vtos, R17_tos, Rscratch1, R12_scratch2);
1845   // Tos now contains the bci, compute the bcp from that.
1846   __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
1847   __ addi(Rscratch2, R17_tos, in_bytes(ConstMethod::codes_offset()));
1848   __ add(R14_bcp, Rscratch1, Rscratch2);
1849   __ dispatch_next(vtos, 0, true);
1850 }
1851 
1852 void TemplateTable::tableswitch() {
1853   transition(itos, vtos);
1854 
1855   Label Ldispatch, Ldefault_case;
1856   Register Rlow_byte         = R3_ARG1,
1857            Rindex            = Rlow_byte,
1858            Rhigh_byte        = R4_ARG2,
1859            Rdef_offset_addr  = R5_ARG3, // is going to contain address of default offset
1860            Rscratch1         = R11_scratch1,
1861            Rscratch2         = R12_scratch2,
1862            Roffset           = R6_ARG4;
1863 
1864   // Align bcp.
1865   __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
1866   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
1867 
1868   // Load lo & hi.
1869   __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
1870   __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned);
1871 
1872   // Check for default case (=index outside [low,high]).
1873   __ cmpw(CR0, R17_tos, Rlow_byte);
1874   __ cmpw(CR1, R17_tos, Rhigh_byte);
1875   __ blt(CR0, Ldefault_case);
1876   __ bgt(CR1, Ldefault_case);
1877 
1878   // Lookup dispatch offset.
1879   __ sub(Rindex, R17_tos, Rlow_byte);
1880   __ extsw(Rindex, Rindex);
1881   __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
1882   __ sldi(Rindex, Rindex, LogBytesPerInt);
1883   __ addi(Rindex, Rindex, 3 * BytesPerInt);
1884 #if defined(VM_LITTLE_ENDIAN)
1885   __ lwbrx(Roffset, Rdef_offset_addr, Rindex);
1886   __ extsw(Roffset, Roffset);
1887 #else
1888   __ lwax(Roffset, Rdef_offset_addr, Rindex);
1889 #endif
1890   __ b(Ldispatch);
1891 
1892   __ bind(Ldefault_case);
1893   __ profile_switch_default(Rhigh_byte, Rscratch1);
1894   __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
1895 
1896   __ bind(Ldispatch);
1897 
1898   __ add(R14_bcp, Roffset, R14_bcp);
1899   __ dispatch_next(vtos, 0, true);
1900 }
1901 
1902 void TemplateTable::lookupswitch() {
1903   transition(itos, itos);
1904   __ stop("lookupswitch bytecode should have been rewritten");
1905 }
1906 
1907 // Table switch using linear search through cases.
1908 // Bytecode stream format:
1909 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
1910 // Note: Everything is big-endian format here.
1911 void TemplateTable::fast_linearswitch() {
1912   transition(itos, vtos);
1913 
1914   Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case;
1915   Register Rcount           = R3_ARG1,
1916            Rcurrent_pair    = R4_ARG2,
1917            Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
1918            Roffset          = R31,     // Might need to survive C call.
1919            Rvalue           = R12_scratch2,
1920            Rscratch         = R11_scratch1,
1921            Rcmp_value       = R17_tos;
1922 
1923   // Align bcp.
1924   __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
1925   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
1926 
1927   // Setup loop counter and limit.
1928   __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
1929   __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.
1930 
1931   __ mtctr(Rcount);
1932   __ cmpwi(CR0, Rcount, 0);
1933   __ bne(CR0, Lloop_entry);
1934 
1935   // Default case
1936   __ bind(Ldefault_case);
1937   __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
1938   if (ProfileInterpreter) {
1939     __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
1940   }
1941   __ b(Lcontinue_execution);
1942 
1943   // Next iteration
1944   __ bind(Lsearch_loop);
1945   __ bdz(Ldefault_case);
1946   __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
1947   __ bind(Lloop_entry);
1948   __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned);
1949   __ cmpw(CR0, Rvalue, Rcmp_value);
1950   __ bne(CR0, Lsearch_loop);
1951 
1952   // Found, load offset.
1953   __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed);
1954   // Calculate case index and profile
1955   __ mfctr(Rcurrent_pair);
1956   if (ProfileInterpreter) {
1957     __ sub(Rcurrent_pair, Rcount, Rcurrent_pair);
1958     __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
1959   }
1960 
1961   __ bind(Lcontinue_execution);
1962   __ add(R14_bcp, Roffset, R14_bcp);
1963   __ dispatch_next(vtos, 0, true);
1964 }
1965 
1966 // Table switch using binary search (value/offset pairs are ordered).
1967 // Bytecode stream format:
1968 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
1969 // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
1970 void TemplateTable::fast_binaryswitch() {
1971 
1972   transition(itos, vtos);
1973   // Implementation using the following core algorithm: (copied from Intel)
1974   //
1975   // int binary_search(int key, LookupswitchPair* array, int n) {
1976   //   // Binary search according to "Methodik des Programmierens" by
1977   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1978   //   int i = 0;
1979   //   int j = n;
1980   //   while (i+1 < j) {
1981   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1982   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1983   //     // where a stands for the array and assuming that the (inexisting)
1984   //     // element a[n] is infinitely big.
1985   //     int h = (i + j) >> 1;
1986   //     // i < h < j
1987   //     if (key < array[h].fast_match()) {
1988   //       j = h;
1989   //     } else {
1990   //       i = h;
1991   //     }
1992   //   }
1993   //   // R: a[i] <= key < a[i+1] or Q
1994   //   // (i.e., if key is within array, i is the correct index)
1995   //   return i;
1996   // }
1997 
1998   // register allocation
1999   const Register Rkey     = R17_tos;          // already set (tosca)
2000   const Register Rarray   = R3_ARG1;
2001   const Register Ri       = R4_ARG2;
2002   const Register Rj       = R5_ARG3;
2003   const Register Rh       = R6_ARG4;
2004   const Register Rscratch = R11_scratch1;
2005 
2006   const int log_entry_size = 3;
2007   const int entry_size = 1 << log_entry_size;
2008 
2009   Label found;
2010 
2011   // Find Array start,
2012   __ addi(Rarray, R14_bcp, 3 * BytesPerInt);
2013   __ clrrdi(Rarray, Rarray, LogBytesPerInt);
2014 
2015   // initialize i & j
2016   __ li(Ri,0);
2017   __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned);
2018 
2019   // and start.
2020   Label entry;
2021   __ b(entry);
2022 
2023   // binary search loop
2024   { Label loop;
2025     __ bind(loop);
2026     // int h = (i + j) >> 1;
2027     __ srdi(Rh, Rh, 1);
2028     // if (key < array[h].fast_match()) {
2029     //   j = h;
2030     // } else {
2031     //   i = h;
2032     // }
2033     __ sldi(Rscratch, Rh, log_entry_size);
2034 #if defined(VM_LITTLE_ENDIAN)
2035     __ lwbrx(Rscratch, Rscratch, Rarray);
2036 #else
2037     __ lwzx(Rscratch, Rscratch, Rarray);
2038 #endif
2039 
2040     // if (key < current value)
2041     //   Rh = Rj
2042     // else
2043     //   Rh = Ri
2044     Label Lgreater;
2045     __ cmpw(CR0, Rkey, Rscratch);
2046     __ bge(CR0, Lgreater);
2047     __ mr(Rj, Rh);
2048     __ b(entry);
2049     __ bind(Lgreater);
2050     __ mr(Ri, Rh);
2051 
2052     // while (i+1 < j)
2053     __ bind(entry);
2054     __ addi(Rscratch, Ri, 1);
2055     __ cmpw(CR0, Rscratch, Rj);
2056     __ add(Rh, Ri, Rj); // start h = i + j >> 1;
2057 
2058     __ blt(CR0, loop);
2059   }
2060 
2061   // End of binary search, result index is i (must check again!).
2062   Label default_case;
2063   Label continue_execution;
2064   if (ProfileInterpreter) {
2065     __ mr(Rh, Ri);              // Save index in i for profiling.
2066   }
2067   // Ri = value offset
2068   __ sldi(Ri, Ri, log_entry_size);
2069   __ add(Ri, Ri, Rarray);
2070   __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);
2071 
2072   Label not_found;
2073   // Ri = offset offset
2074   __ cmpw(CR0, Rkey, Rscratch);
2075   __ beq(CR0, not_found);
2076   // entry not found -> j = default offset
2077   __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned);
2078   __ b(default_case);
2079 
2080   __ bind(not_found);
2081   // entry found -> j = offset
2082   __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
2083   __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned);
2084 
2085   if (ProfileInterpreter) {
2086     __ b(continue_execution);
2087   }
2088 
2089   __ bind(default_case); // fall through (if not profiling)
2090   __ profile_switch_default(Ri, Rscratch);
2091 
2092   __ bind(continue_execution);
2093 
2094   __ extsw(Rj, Rj);
2095   __ add(R14_bcp, Rj, R14_bcp);
2096   __ dispatch_next(vtos, 0 , true);
2097 }
2098 
2099 void TemplateTable::_return(TosState state) {
2100   transition(state, state);
2101   assert(_desc->calls_vm(),
2102          "inconsistent calls_vm information"); // call in remove_activation
2103 
2104   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2105 
2106     Register Rscratch     = R11_scratch1,
2107              Rklass       = R12_scratch2,
2108              Rklass_flags = Rklass;
2109     Label Lskip_register_finalizer;
2110 
2111     // Check if the method has the FINALIZER flag set and call into the VM to finalize in this case.
2112     assert(state == vtos, "only valid state");
2113     __ ld(R17_tos, 0, R18_locals);
2114 
2115     // Load klass of this obj.
2116     __ load_klass(Rklass, R17_tos);
2117     __ lbz(Rklass_flags, in_bytes(Klass::misc_flags_offset()), Rklass);
2118     __ testbitdi(CR0, R0, Rklass_flags, exact_log2(KlassFlags::_misc_has_finalizer));
2119     __ bfalse(CR0, Lskip_register_finalizer);
2120 
2121     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), R17_tos /* obj */);
2122 
2123     __ align(32, 12);
2124     __ bind(Lskip_register_finalizer);
2125   }
2126 
2127   if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
2128     Label no_safepoint;
2129     __ ld(R11_scratch1, in_bytes(JavaThread::polling_word_offset()), R16_thread);
2130     __ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit());
2131     __ beq(CR0, no_safepoint);
2132     __ push(state);
2133     __ push_cont_fastpath();
2134     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
2135     __ pop_cont_fastpath();
2136     __ pop(state);
2137     __ bind(no_safepoint);
2138   }
2139 
2140   // Move the result value into the correct register and remove memory stack frame.
2141   __ remove_activation(state, /* throw_monitor_exception */ true);
2142   // Restoration of lr done by remove_activation.
2143   switch (state) {
2144     // Narrow result if state is itos but result type is smaller.
2145     // Need to narrow in the return bytecode rather than in generate_return_entry
2146     // since compiled code callers expect the result to already be narrowed.
2147     case itos: __ narrow(R17_tos); /* fall through */
2148     case ltos:
2149     case atos: __ mr(R3_RET, R17_tos); break;
2150     case ftos:
2151     case dtos: __ fmr(F1_RET, F15_ftos); break;
2152     case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
2153                // to get visible before the reference to the object gets stored anywhere.
2154                __ membar(Assembler::StoreStore); break;
2155     default  : ShouldNotReachHere();
2156   }
2157   __ blr();
2158 }
2159 
2160 // ============================================================================
2161 // Constant pool cache access
2162 //
2163 // Memory ordering:
2164 //
2165 // Like done in C++ interpreter, we load the fields
2166 //   - _indices
2167 //   - _f12_oop
2168 // acquired, because these are asked if the cache is already resolved. We don't
2169 // want to float loads above this check.
2170 // See also comments in ConstantPoolCacheEntry::bytecode_1(),
2171 // ConstantPoolCacheEntry::bytecode_2() and ConstantPoolCacheEntry::f1();
2172 
2173 // Call into the VM if call site is not yet resolved
2174 //
2175 // Input regs:
2176 //   - None, all passed regs are outputs.
2177 //
2178 // Returns:
2179 //   - Rcache:  The const pool cache entry that contains the resolved result.
2180 //
2181 // Kills:
2182 //   - Rscratch
2183 void TemplateTable::resolve_cache_and_index_for_method(int byte_no, Register Rcache, Register Rscratch) {
2184   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2185 
2186   Label L_clinit_barrier_slow, L_done;
2187   Register Rindex = Rscratch;
2188 
2189   Bytecodes::Code code = bytecode();
2190   const int bytecode_offset = (byte_no == f1_byte) ? in_bytes(ResolvedMethodEntry::bytecode1_offset())
2191                                                    : in_bytes(ResolvedMethodEntry::bytecode2_offset());
2192   __ load_method_entry(Rcache, Rindex);
2193   // Load-acquire the bytecode to match store-release in InterpreterRuntime
2194   __ lbz(Rscratch, bytecode_offset, Rcache);
2195   // Acquire by cmp-br-isync (see below).
2196   __ cmpdi(CR0, Rscratch, (int)code);
2197   __ bne(CR0, L_clinit_barrier_slow);
2198 
2199   __ isync(); // Order load wrt. succeeding loads.
2200 
2201   // Class initialization barrier for static methods
2202   if (bytecode() == Bytecodes::_invokestatic) {
2203     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2204     const Register method = Rscratch;
2205     const Register klass  = Rscratch;
2206 
2207     __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
2208     __ load_method_holder(klass, method);
2209     __ clinit_barrier(klass, R16_thread, &L_done, /*L_slow_path*/ nullptr);
2210   } else {
2211     __ b(L_done);
2212   }
2213 
2214   // Class initialization barrier slow path lands here as well.
2215   __ bind(L_clinit_barrier_slow);
2216   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2217   __ li(R4_ARG2, code);
2218   __ call_VM_preemptable(noreg, entry, R4_ARG2);
2219 
2220   // Update registers with resolved info.
2221   __ load_method_entry(Rcache, Rindex);
2222   __ bind(L_done);
2223 }
2224 
2225 void TemplateTable::resolve_cache_and_index_for_field(int byte_no, Register Rcache, Register index) {
2226   assert_different_registers(Rcache, index);
2227 
2228   Label L_clinit_barrier_slow, L_done;
2229 
2230   Bytecodes::Code code = bytecode();
2231   switch (code) {
2232   case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2233   case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2234   default: break;
2235   }
2236 
2237   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2238   __ load_field_entry(Rcache, index);
2239   int code_offset = (byte_no == f1_byte) ? in_bytes(ResolvedFieldEntry::get_code_offset())
2240                                          : in_bytes(ResolvedFieldEntry::put_code_offset());
2241   __ lbz(R0, code_offset, Rcache);
2242   __ cmpwi(CR0, R0, (int)code); // have we resolved this bytecode?
2243   __ bne(CR0, L_clinit_barrier_slow);
2244 
2245   __ isync(); // Order load wrt. succeeding loads.
2246 
2247   // Class initialization barrier for static fields
2248   if (bytecode() == Bytecodes::_getstatic || bytecode() == Bytecodes::_putstatic) {
2249     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2250     const Register field_holder = R4_ARG2;
2251 
2252     // InterpreterRuntime::resolve_get_put sets field_holder and finally release-stores put_code.
2253     // We have seen the released put_code above and will read the corresponding field_holder and init_state
2254     // (ordered by compare-branch-isync).
2255     __ ld(field_holder, ResolvedFieldEntry::field_holder_offset(), Rcache);
2256     __ clinit_barrier(field_holder, R16_thread, &L_done, /*L_slow_path*/ nullptr);
2257   } else {
2258     __ b(L_done);
2259   }
2260 
2261   // resolve first time through
2262   // Class initialization barrier slow path lands here as well.
2263   __ bind(L_clinit_barrier_slow);
2264   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2265   __ li(R4_ARG2, code);
2266   __ call_VM_preemptable(noreg, entry, R4_ARG2);
2267 
2268   // Update registers with resolved info
2269   __ load_field_entry(Rcache, index);
2270   __ bind(L_done);
2271 }
2272 
2273 void TemplateTable::load_resolved_field_entry(Register obj,
2274                                               Register cache,
2275                                               Register tos_state,
2276                                               Register offset,
2277                                               Register flags,
2278                                               bool is_static = false) {
2279   assert_different_registers(cache, tos_state, flags, offset);
2280 
2281   // Field offset
2282   __ load_sized_value(offset, in_bytes(ResolvedFieldEntry::field_offset_offset()), cache, sizeof(int), true /*is_signed*/);
2283 
2284   // Flags
2285   __ lbz(flags, in_bytes(ResolvedFieldEntry::flags_offset()), cache);
2286 
2287   if (tos_state != noreg) {
2288     __ lbz(tos_state, in_bytes(ResolvedFieldEntry::type_offset()), cache);
2289   }
2290 
2291   // Klass overwrite register
2292   if (is_static) {
2293     __ ld(obj, in_bytes(ResolvedFieldEntry::field_holder_offset()), cache);
2294     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2295     __ ld(obj, mirror_offset, obj);
2296     __ resolve_oop_handle(obj, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
2297   }
2298 }
2299 
2300 void TemplateTable::load_resolved_method_entry_special_or_static(Register cache,
2301                                                                  Register method,
2302                                                                  Register flags) {
2303   assert_different_registers(cache, method, flags);
2304 
2305   // determine constant pool cache field offsets
2306   resolve_cache_and_index_for_method(f1_byte, cache, method /* tmp */);
2307   if (flags != noreg) {
2308     __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2309   }
2310   __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), cache);
2311 }
2312 
2313 void TemplateTable::load_resolved_method_entry_handle(Register cache,
2314                                                       Register method,
2315                                                       Register ref_index,
2316                                                       Register flags) {
2317   // setup registers
2318   assert_different_registers(cache, method, ref_index, flags);
2319 
2320   // determine constant pool cache field offsets
2321   resolve_cache_and_index_for_method(f1_byte, cache, method /* tmp */);
2322   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2323 
2324   // maybe push appendix to arguments (just before return address)
2325   Label L_no_push;
2326   __ testbitdi(CR0, R0, flags, ResolvedMethodEntry::has_appendix_shift);
2327   __ bfalse(CR0, L_no_push);
2328   // invokehandle uses an index into the resolved references array
2329   __ lhz(ref_index, in_bytes(ResolvedMethodEntry::resolved_references_index_offset()), cache);
2330   // Push the appendix as a trailing parameter.
2331   // This must be done before we get the receiver,
2332   // since the parameter_size includes it.
2333   Register appendix = method;
2334   assert(cache->is_nonvolatile(), "C-call in resolve_oop_handle");
2335   __ load_resolved_reference_at_index(appendix, ref_index, R11_scratch1, R12_scratch2);
2336   __ verify_oop(appendix);
2337   __ push_ptr(appendix); // push appendix (MethodType, CallSite, etc.)
2338   __ bind(L_no_push);
2339 
2340   __ ld(method, in_bytes(ResolvedMethodEntry::method_offset()), cache);
2341 }
2342 
2343 void TemplateTable::load_resolved_method_entry_interface(Register cache,
2344                                                          Register klass,
2345                                                          Register method_or_table_index,
2346                                                          Register flags) {
2347   // setup registers
2348   assert_different_registers(method_or_table_index, cache, flags);
2349   assert(klass == noreg, "to be determined by caller");
2350   assert(method_or_table_index == noreg, "to be determined by caller");
2351 
2352   // determine constant pool cache field offsets
2353   resolve_cache_and_index_for_method(f1_byte, cache, flags /* tmp */);
2354   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2355 }
2356 
2357 void TemplateTable::load_resolved_method_entry_virtual(Register cache,
2358                                                        Register method_or_table_index,
2359                                                        Register flags) {
2360   // setup registers
2361   assert_different_registers(cache, flags);
2362   assert(method_or_table_index == noreg, "to be determined by caller");
2363 
2364   // determine constant pool cache field offsets
2365   resolve_cache_and_index_for_method(f2_byte, cache, flags /* tmp */);
2366   __ lbz(flags, in_bytes(ResolvedMethodEntry::flags_offset()), cache);
2367 }
2368 
2369 // Sets registers:
2370 //   `method`   Target method for invokedynamic
2371 //   R3_RET     Return address for invoke
2372 //
2373 // Kills: R11, R21, R30, R31
2374 void TemplateTable::load_invokedynamic_entry(Register method) {
2375   // setup registers
2376   const Register ret_addr = R3_RET;
2377   const Register appendix = R30;
2378   const Register cache    = R31;
2379   const Register index    = R21_tmp1;
2380   const Register tmp      = R11_scratch1;
2381   assert_different_registers(method, appendix, cache, index, tmp);
2382 
2383   Label resolved;
2384 
2385   __ load_resolved_indy_entry(cache, index);
2386   __ ld_ptr(method, in_bytes(ResolvedIndyEntry::method_offset()), cache);
2387 
2388   // The invokedynamic is unresolved iff method is null
2389   __ cmpdi(CR0, method, 0);
2390   __ bne(CR0, resolved);
2391 
2392   Bytecodes::Code code = bytecode();
2393 
2394   // Call to the interpreter runtime to resolve invokedynamic
2395   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2396   __ li(R4_ARG2, code);
2397   __ call_VM(noreg, entry, R4_ARG2, true);
2398   // Update registers with resolved info
2399   __ load_resolved_indy_entry(cache, index);
2400   __ ld_ptr(method, in_bytes(ResolvedIndyEntry::method_offset()), cache);
2401 
2402   DEBUG_ONLY(__ cmpdi(CR0, method, 0));
2403   __ asm_assert_ne("Should be resolved by now");
2404   __ bind(resolved);
2405   __ isync(); // Order load wrt. succeeding loads.
2406 
2407   Label L_no_push;
2408   // Check if there is an appendix
2409   __ lbz(index, in_bytes(ResolvedIndyEntry::flags_offset()), cache);
2410   __ rldicl_(R0, index, 64-ResolvedIndyEntry::has_appendix_shift, 63);
2411   __ beq(CR0, L_no_push);
2412 
2413   // Get appendix
2414   __ lhz(index, in_bytes(ResolvedIndyEntry::resolved_references_index_offset()), cache);
2415   // Push the appendix as a trailing parameter
2416   assert(cache->is_nonvolatile(), "C-call in resolve_oop_handle");
2417   __ load_resolved_reference_at_index(appendix, index, /* temp */ ret_addr, tmp);
2418   __ verify_oop(appendix);
2419   __ push_ptr(appendix);   // push appendix (MethodType, CallSite, etc.)
2420   __ bind(L_no_push);
2421 
2422   // load return address
2423   {
2424     Register Rtable_addr = tmp;
2425     address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
2426 
2427     // compute return type
2428     __ lbz(index, in_bytes(ResolvedIndyEntry::result_type_offset()), cache);
2429     __ load_dispatch_table(Rtable_addr, (address*)table_addr);
2430     __ sldi(index, index, LogBytesPerWord);
2431     // Get return address.
2432     __ ldx(ret_addr, Rtable_addr, index);
2433   }
2434 }
2435 
2436 // ============================================================================
2437 // Field access
2438 
2439 // Volatile variables demand their effects be made known to all CPU's
2440 // in order. Store buffers on most chips allow reads & writes to
2441 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2442 // without some kind of memory barrier (i.e., it's not sufficient that
2443 // the interpreter does not reorder volatile references, the hardware
2444 // also must not reorder them).
2445 //
2446 // According to the new Java Memory Model (JMM):
2447 // (1) All volatiles are serialized wrt to each other. ALSO reads &
2448 //     writes act as acquire & release, so:
2449 // (2) A read cannot let unrelated NON-volatile memory refs that
2450 //     happen after the read float up to before the read. It's OK for
2451 //     non-volatile memory refs that happen before the volatile read to
2452 //     float down below it.
2453 // (3) Similar a volatile write cannot let unrelated NON-volatile
2454 //     memory refs that happen BEFORE the write float down to after the
2455 //     write. It's OK for non-volatile memory refs that happen after the
2456 //     volatile write to float up before it.
2457 //
2458 // We only put in barriers around volatile refs (they are expensive),
2459 // not _between_ memory refs (that would require us to track the
2460 // flavor of the previous memory refs). Requirements (2) and (3)
2461 // require some barriers before volatile stores and after volatile
2462 // loads. These nearly cover requirement (1) but miss the
2463 // volatile-store-volatile-load case.  This final case is placed after
2464 // volatile-stores although it could just as well go before
2465 // volatile-loads.
2466 
2467 // The registers cache and index expected to be set before call.
2468 // Correct values of the cache and index registers are preserved.
2469 // Kills:
2470 //   Rcache (if has_tos)
2471 //   Rscratch
2472 void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rscratch, bool is_static, bool has_tos) {
2473 
2474   assert_different_registers(Rcache, Rscratch);
2475 
2476   if (JvmtiExport::can_post_field_access()) {
2477     Label Lno_field_access_post;
2478 
2479     // Check if post field access in enabled.
2480     int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_access_count_addr(), R0, true);
2481     __ lwz(Rscratch, offs, Rscratch);
2482 
2483     __ cmpwi(CR0, Rscratch, 0);
2484     __ beq(CR0, Lno_field_access_post);
2485 
2486     // Post access enabled - do it!
2487     if (is_static) {
2488       __ li(R17_tos, 0);
2489     } else {
2490       if (has_tos) {
2491         // The fast bytecode versions have obj ptr in register.
2492         // Thus, save object pointer before call_VM() clobbers it
2493         // put object on tos where GC wants it.
2494         __ push_ptr(R17_tos);
2495       } else {
2496         // Load top of stack (do not pop the value off the stack).
2497         __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp);
2498       }
2499       __ verify_oop(R17_tos);
2500     }
2501     // tos:   object pointer or null if static
2502     // cache: cache entry pointer
2503     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R17_tos, Rcache);
2504     if (!is_static && has_tos) {
2505       // Restore object pointer.
2506       __ pop_ptr(R17_tos);
2507       __ verify_oop(R17_tos);
2508     } else {
2509       // Cache is still needed to get class or obj.
2510       __ load_field_entry(Rcache, Rscratch);
2511     }
2512 
2513     __ align(32, 12);
2514     __ bind(Lno_field_access_post);
2515   }
2516 }
2517 
2518 // kills R11_scratch1
2519 void TemplateTable::pop_and_check_object(Register Roop) {
2520   Register Rtmp = R11_scratch1;
2521 
2522   assert_different_registers(Rtmp, Roop);
2523   __ pop_ptr(Roop);
2524   // For field access must check obj.
2525   __ null_check_throw(Roop, -1, Rtmp);
2526   __ verify_oop(Roop);
2527 }
2528 
2529 // PPC64: implement volatile loads as fence-store-acquire.
2530 void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2531   transition(vtos, vtos);
2532 
2533   Label Lacquire, Lisync;
2534 
2535   const Register Rcache        = R3_ARG1,
2536                  Rclass_or_obj = R22_tmp2, // Needs to survive C call.
2537                  Roffset       = R23_tmp3, // Needs to survive C call.
2538                  Rtos_state    = R30,      // Needs to survive C call.
2539                  Rflags        = R31,      // Needs to survive C call.
2540                  Rbtable       = R5_ARG3,
2541                  Rbc           = R30,
2542                  Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
2543                  // R12_scratch2 used by load_field_cp_cache_entry
2544 
2545   static address field_branch_table[number_of_states],
2546                  static_branch_table[number_of_states];
2547 
2548   address* branch_table = (is_static || rc == may_not_rewrite) ? static_branch_table : field_branch_table;
2549 
2550   // Get field offset.
2551   resolve_cache_and_index_for_field(byte_no, Rcache, Rscratch);
2552 
2553   // JVMTI support
2554   jvmti_post_field_access(Rcache, Rscratch, is_static, false);
2555 
2556   // Load after possible GC.
2557   load_resolved_field_entry(Rclass_or_obj, Rcache, Rtos_state, Roffset, Rflags, is_static); // Uses R11, R12
2558 
2559   // Load pointer to branch table.
2560   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
2561 
2562   // Get volatile flag.
2563   __ rldicl(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
2564   // Note: sync is needed before volatile load on PPC64.
2565 
2566 #ifdef ASSERT
2567   Label LFlagInvalid;
2568   __ cmpldi(CR0, Rtos_state, number_of_states);
2569   __ bge(CR0, LFlagInvalid);
2570 #endif
2571 
2572   // Load from branch table and dispatch (volatile case: one instruction ahead).
2573   __ sldi(Rtos_state, Rtos_state, LogBytesPerWord);
2574   __ cmpwi(CR2, Rscratch, 1); // Volatile?
2575   if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
2576     __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile ? size of 1 instruction : 0.
2577   }
2578   __ ldx(Rbtable, Rbtable, Rtos_state);
2579 
2580   // Get the obj from stack.
2581   if (!is_static) {
2582     pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
2583   } else {
2584     __ verify_oop(Rclass_or_obj);
2585   }
2586 
2587   if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
2588     __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
2589   }
2590   __ mtctr(Rbtable);
2591   __ bctr();
2592 
2593 #ifdef ASSERT
2594   __ bind(LFlagInvalid);
2595   __ stop("got invalid flag");
2596 #endif
2597 
2598   if (!is_static && rc == may_not_rewrite) {
2599     // We reuse the code from is_static.  It's jumped to via the table above.
2600     return;
2601   }
2602 
2603 #ifdef ASSERT
2604   // __ bind(Lvtos);
2605   address pc_before_fence = __ pc();
2606   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2607   assert(__ pc() - pc_before_fence == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
2608   assert(branch_table[vtos] == nullptr, "can't compute twice");
2609   branch_table[vtos] = __ pc(); // non-volatile_entry point
2610   __ stop("vtos unexpected");
2611 #endif
2612 
2613   __ align(32, 28, 28); // Align load.
2614   // __ bind(Ldtos);
2615   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2616   assert(branch_table[dtos] == nullptr, "can't compute twice");
2617   branch_table[dtos] = __ pc(); // non-volatile_entry point
2618   __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
2619   __ push(dtos);
2620   if (!is_static && rc == may_rewrite) {
2621     patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch);
2622   }
2623   {
2624     Label acquire_double;
2625     __ beq(CR2, acquire_double); // Volatile?
2626     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2627 
2628     __ bind(acquire_double);
2629     __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
2630     __ beq_predict_taken(CR0, Lisync);
2631     __ b(Lisync); // In case of NAN.
2632   }
2633 
2634   __ align(32, 28, 28); // Align load.
2635   // __ bind(Lftos);
2636   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2637   assert(branch_table[ftos] == nullptr, "can't compute twice");
2638   branch_table[ftos] = __ pc(); // non-volatile_entry point
2639   __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
2640   __ push(ftos);
2641   if (!is_static && rc == may_rewrite) {
2642     patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch);
2643   }
2644   {
2645     Label acquire_float;
2646     __ beq(CR2, acquire_float); // Volatile?
2647     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2648 
2649     __ bind(acquire_float);
2650     __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
2651     __ beq_predict_taken(CR0, Lisync);
2652     __ b(Lisync); // In case of NAN.
2653   }
2654 
2655   __ align(32, 28, 28); // Align load.
2656   // __ bind(Litos);
2657   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2658   assert(branch_table[itos] == nullptr, "can't compute twice");
2659   branch_table[itos] = __ pc(); // non-volatile_entry point
2660   __ lwax(R17_tos, Rclass_or_obj, Roffset);
2661   __ push(itos);
2662   if (!is_static && rc == may_rewrite) {
2663     patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch);
2664   }
2665   __ beq(CR2, Lacquire); // Volatile?
2666   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2667 
2668   __ align(32, 28, 28); // Align load.
2669   // __ bind(Lltos);
2670   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2671   assert(branch_table[ltos] == nullptr, "can't compute twice");
2672   branch_table[ltos] = __ pc(); // non-volatile_entry point
2673   __ ldx(R17_tos, Rclass_or_obj, Roffset);
2674   __ push(ltos);
2675   if (!is_static && rc == may_rewrite) {
2676     patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch);
2677   }
2678   __ beq(CR2, Lacquire); // Volatile?
2679   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2680 
2681   __ align(32, 28, 28); // Align load.
2682   // __ bind(Lbtos);
2683   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2684   assert(branch_table[btos] == nullptr, "can't compute twice");
2685   branch_table[btos] = __ pc(); // non-volatile_entry point
2686   __ lbzx(R17_tos, Rclass_or_obj, Roffset);
2687   __ extsb(R17_tos, R17_tos);
2688   __ push(btos);
2689   if (!is_static && rc == may_rewrite) {
2690     patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
2691   }
2692   __ beq(CR2, Lacquire); // Volatile?
2693   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2694 
2695   __ align(32, 28, 28); // Align load.
2696   // __ bind(Lztos); (same code as btos)
2697   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2698   assert(branch_table[ztos] == nullptr, "can't compute twice");
2699   branch_table[ztos] = __ pc(); // non-volatile_entry point
2700   __ lbzx(R17_tos, Rclass_or_obj, Roffset);
2701   __ push(ztos);
2702   if (!is_static && rc == may_rewrite) {
2703     // use btos rewriting, no truncating to t/f bit is needed for getfield.
2704     patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
2705   }
2706   __ beq(CR2, Lacquire); // Volatile?
2707   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2708 
2709   __ align(32, 28, 28); // Align load.
2710   // __ bind(Lctos);
2711   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2712   assert(branch_table[ctos] == nullptr, "can't compute twice");
2713   branch_table[ctos] = __ pc(); // non-volatile_entry point
2714   __ lhzx(R17_tos, Rclass_or_obj, Roffset);
2715   __ push(ctos);
2716   if (!is_static && rc == may_rewrite) {
2717     patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch);
2718   }
2719   __ beq(CR2, Lacquire); // Volatile?
2720   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2721 
2722   __ align(32, 28, 28); // Align load.
2723   // __ bind(Lstos);
2724   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2725   assert(branch_table[stos] == nullptr, "can't compute twice");
2726   branch_table[stos] = __ pc(); // non-volatile_entry point
2727   __ lhax(R17_tos, Rclass_or_obj, Roffset);
2728   __ push(stos);
2729   if (!is_static && rc == may_rewrite) {
2730     patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch);
2731   }
2732   __ beq(CR2, Lacquire); // Volatile?
2733   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2734 
2735   __ align(32, 28, 28); // Align load.
2736   // __ bind(Latos);
2737   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
2738   assert(branch_table[atos] == nullptr, "can't compute twice");
2739   branch_table[atos] = __ pc(); // non-volatile_entry point
2740   do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
2741   __ verify_oop(R17_tos);
2742   __ push(atos);
2743   //__ dcbt(R17_tos); // prefetch
2744   if (!is_static && rc == may_rewrite) {
2745     patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
2746   }
2747   __ beq(CR2, Lacquire); // Volatile?
2748   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2749 
2750   __ align(32, 12);
2751   __ bind(Lacquire);
2752   __ twi_0(R17_tos);
2753   __ bind(Lisync);
2754   __ isync(); // acquire
2755 
2756 #ifdef ASSERT
2757   for (int i = 0; i<number_of_states; ++i) {
2758     assert(branch_table[i], "get initialization");
2759     //tty->print_cr("get: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
2760     //              is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
2761   }
2762 #endif
2763 }
2764 
2765 void TemplateTable::getfield(int byte_no) {
2766   getfield_or_static(byte_no, false);
2767 }
2768 
2769 void TemplateTable::nofast_getfield(int byte_no) {
2770   getfield_or_static(byte_no, false, may_not_rewrite);
2771 }
2772 
2773 void TemplateTable::getstatic(int byte_no) {
2774   getfield_or_static(byte_no, true);
2775 }
2776 
2777 // The registers cache and index expected to be set before call.
2778 // The function may destroy various registers, just not the cache and index registers.
2779 void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, bool is_static) {
2780 
2781   assert_different_registers(Rcache, Rscratch, R6_ARG4);
2782 
2783   if (JvmtiExport::can_post_field_modification()) {
2784     Label Lno_field_mod_post;
2785 
2786     // Check if post field access in enabled.
2787     int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_modification_count_addr(), R0, true);
2788     __ lwz(Rscratch, offs, Rscratch);
2789 
2790     __ cmpwi(CR0, Rscratch, 0);
2791     __ beq(CR0, Lno_field_mod_post);
2792 
2793     // Do the post
2794     const Register Robj = Rscratch;
2795 
2796     if (is_static) {
2797       // Life is simple. Null out the object pointer.
2798       __ li(Robj, 0);
2799     } else {
2800       // In case of the fast versions, value lives in registers => put it back on tos.
2801       int offs = Interpreter::expr_offset_in_bytes(0);
2802       Register base = R15_esp;
2803       switch(bytecode()) {
2804         case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break;
2805         case Bytecodes::_fast_iputfield: // Fall through
2806         case Bytecodes::_fast_bputfield: // Fall through
2807         case Bytecodes::_fast_zputfield: // Fall through
2808         case Bytecodes::_fast_cputfield: // Fall through
2809         case Bytecodes::_fast_sputfield: __ push_i(); offs+=  Interpreter::stackElementSize; break;
2810         case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break;
2811         case Bytecodes::_fast_fputfield: __ push_f(); offs+=  Interpreter::stackElementSize; break;
2812         case Bytecodes::_fast_dputfield: __ push_d(); offs+=2*Interpreter::stackElementSize; break;
2813         default: {
2814           offs = 0;
2815           base = Robj;
2816           const Register Rtos_state = Robj;
2817           Label is_one_slot;
2818           // Life is harder. The stack holds the value on top, followed by the
2819           // object. We don't know the size of the value, though; it could be
2820           // one or two words depending on its type. As a result, we must find
2821           // the type to determine where the object is.
2822           __ lbz(Rtos_state, in_bytes(ResolvedFieldEntry::type_offset()), Rcache);
2823 
2824           __ cmpwi(CR0, Rtos_state, ltos);
2825           __ cmpwi(CR1, Rtos_state, dtos);
2826           __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(1));
2827           __ crnor(CR0, Assembler::equal, CR1, Assembler::equal);
2828           __ beq(CR0, is_one_slot);
2829           __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(2));
2830           __ bind(is_one_slot);
2831           break;
2832         }
2833       }
2834       __ ld(Robj, offs, base);
2835       __ verify_oop(Robj);
2836     }
2837 
2838     __ addi(R6_ARG4, R15_esp, Interpreter::expr_offset_in_bytes(0));
2839     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), Robj, Rcache, R6_ARG4);
2840     __ load_field_entry(Rcache, Rscratch);
2841 
2842     // In case of the fast versions, value lives in registers => put it back on tos.
2843     switch(bytecode()) {
2844       case Bytecodes::_fast_aputfield: __ pop_ptr(); break;
2845       case Bytecodes::_fast_iputfield: // Fall through
2846       case Bytecodes::_fast_bputfield: // Fall through
2847       case Bytecodes::_fast_zputfield: // Fall through
2848       case Bytecodes::_fast_cputfield: // Fall through
2849       case Bytecodes::_fast_sputfield: __ pop_i(); break;
2850       case Bytecodes::_fast_lputfield: __ pop_l(); break;
2851       case Bytecodes::_fast_fputfield: __ pop_f(); break;
2852       case Bytecodes::_fast_dputfield: __ pop_d(); break;
2853       default: break; // Nothin' to do.
2854     }
2855 
2856     __ align(32, 12);
2857     __ bind(Lno_field_mod_post);
2858   }
2859 }
2860 
2861 // PPC64: implement volatile stores as release-store (return bytecode contains an additional release).
2862 void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2863   Label Lvolatile;
2864 
2865   const Register Rcache        = R5_ARG3,  // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
2866                  Rclass_or_obj = R31,      // Needs to survive C call.
2867                  Roffset       = R22_tmp2, // Needs to survive C call.
2868                  Rtos_state    = R23_tmp3, // Needs to survive C call.
2869                  Rflags        = R30,      // Needs to survive C call.
2870                  Rbtable       = R4_ARG2,
2871                  Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
2872                  Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
2873                  Rscratch3     = R6_ARG4,
2874                  Rbc           = Rscratch3;
2875   const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store).
2876 
2877   static address field_rw_branch_table[number_of_states],
2878                  field_norw_branch_table[number_of_states],
2879                  static_branch_table[number_of_states];
2880 
2881   address* branch_table = is_static ? static_branch_table :
2882     (rc == may_rewrite ? field_rw_branch_table : field_norw_branch_table);
2883 
2884   // Stack (grows up):
2885   //  value
2886   //  obj
2887 
2888   // Load the field offset.
2889   resolve_cache_and_index_for_field(byte_no, Rcache, Rscratch);
2890   jvmti_post_field_mod(Rcache, Rscratch, is_static);
2891   load_resolved_field_entry(Rclass_or_obj, Rcache, Rtos_state, Roffset, Rflags, is_static); // Uses R11, R12
2892 
2893   // Load pointer to branch table.
2894   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
2895 
2896   // Get volatile flag.
2897   __ rldicl(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
2898 
2899 #ifdef ASSERT
2900   Label LFlagInvalid;
2901   __ cmpldi(CR0, Rtos_state, number_of_states);
2902   __ bge(CR0, LFlagInvalid);
2903 #endif
2904 
2905   // Load from branch table and dispatch (volatile case: one instruction ahead).
2906   __ sldi(Rtos_state, Rtos_state, LogBytesPerWord);
2907   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
2908     __ cmpwi(CR_is_vol, Rscratch, 1);  // Volatile?
2909   }
2910   __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile? size of instruction 1 : 0.
2911   __ ldx(Rbtable, Rbtable, Rtos_state);
2912 
2913   __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
2914   __ mtctr(Rbtable);
2915   __ bctr();
2916 
2917 #ifdef ASSERT
2918   __ bind(LFlagInvalid);
2919   __ stop("got invalid flag");
2920 
2921   // __ bind(Lvtos);
2922   address pc_before_release = __ pc();
2923   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2924   assert(__ pc() - pc_before_release == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
2925   assert(branch_table[vtos] == nullptr, "can't compute twice");
2926   branch_table[vtos] = __ pc(); // non-volatile_entry point
2927   __ stop("vtos unexpected");
2928 #endif
2929 
2930   __ align(32, 28, 28); // Align pop.
2931   // __ bind(Ldtos);
2932   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2933   assert(branch_table[dtos] == nullptr, "can't compute twice");
2934   branch_table[dtos] = __ pc(); // non-volatile_entry point
2935   __ pop(dtos);
2936   if (!is_static) {
2937     pop_and_check_object(Rclass_or_obj);  // Kills R11_scratch1.
2938   }
2939   __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
2940   if (!is_static && rc == may_rewrite) {
2941     patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no);
2942   }
2943   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
2944     __ beq(CR_is_vol, Lvolatile); // Volatile?
2945   }
2946   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2947 
2948   __ align(32, 28, 28); // Align pop.
2949   // __ bind(Lftos);
2950   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2951   assert(branch_table[ftos] == nullptr, "can't compute twice");
2952   branch_table[ftos] = __ pc(); // non-volatile_entry point
2953   __ pop(ftos);
2954   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
2955   __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
2956   if (!is_static && rc == may_rewrite) {
2957     patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no);
2958   }
2959   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
2960     __ beq(CR_is_vol, Lvolatile); // Volatile?
2961   }
2962   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2963 
2964   __ align(32, 28, 28); // Align pop.
2965   // __ bind(Litos);
2966   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2967   assert(branch_table[itos] == nullptr, "can't compute twice");
2968   branch_table[itos] = __ pc(); // non-volatile_entry point
2969   __ pop(itos);
2970   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
2971   __ stwx(R17_tos, Rclass_or_obj, Roffset);
2972   if (!is_static && rc == may_rewrite) {
2973     patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no);
2974   }
2975   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
2976     __ beq(CR_is_vol, Lvolatile); // Volatile?
2977   }
2978   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2979 
2980   __ align(32, 28, 28); // Align pop.
2981   // __ bind(Lltos);
2982   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2983   assert(branch_table[ltos] == nullptr, "can't compute twice");
2984   branch_table[ltos] = __ pc(); // non-volatile_entry point
2985   __ pop(ltos);
2986   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
2987   __ stdx(R17_tos, Rclass_or_obj, Roffset);
2988   if (!is_static && rc == may_rewrite) {
2989     patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no);
2990   }
2991   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
2992     __ beq(CR_is_vol, Lvolatile); // Volatile?
2993   }
2994   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
2995 
2996   __ align(32, 28, 28); // Align pop.
2997   // __ bind(Lbtos);
2998   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
2999   assert(branch_table[btos] == nullptr, "can't compute twice");
3000   branch_table[btos] = __ pc(); // non-volatile_entry point
3001   __ pop(btos);
3002   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3003   __ stbx(R17_tos, Rclass_or_obj, Roffset);
3004   if (!is_static && rc == may_rewrite) {
3005     patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no);
3006   }
3007   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3008     __ beq(CR_is_vol, Lvolatile); // Volatile?
3009   }
3010   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3011 
3012   __ align(32, 28, 28); // Align pop.
3013   // __ bind(Lztos);
3014   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3015   assert(branch_table[ztos] == nullptr, "can't compute twice");
3016   branch_table[ztos] = __ pc(); // non-volatile_entry point
3017   __ pop(ztos);
3018   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3019   __ andi(R17_tos, R17_tos, 0x1);
3020   __ stbx(R17_tos, Rclass_or_obj, Roffset);
3021   if (!is_static && rc == may_rewrite) {
3022     patch_bytecode(Bytecodes::_fast_zputfield, Rbc, Rscratch, true, byte_no);
3023   }
3024   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3025     __ beq(CR_is_vol, Lvolatile); // Volatile?
3026   }
3027   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3028 
3029   __ align(32, 28, 28); // Align pop.
3030   // __ bind(Lctos);
3031   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3032   assert(branch_table[ctos] == nullptr, "can't compute twice");
3033   branch_table[ctos] = __ pc(); // non-volatile_entry point
3034   __ pop(ctos);
3035   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1..
3036   __ sthx(R17_tos, Rclass_or_obj, Roffset);
3037   if (!is_static && rc == may_rewrite) {
3038     patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no);
3039   }
3040   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3041     __ beq(CR_is_vol, Lvolatile); // Volatile?
3042   }
3043   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3044 
3045   __ align(32, 28, 28); // Align pop.
3046   // __ bind(Lstos);
3047   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3048   assert(branch_table[stos] == nullptr, "can't compute twice");
3049   branch_table[stos] = __ pc(); // non-volatile_entry point
3050   __ pop(stos);
3051   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
3052   __ sthx(R17_tos, Rclass_or_obj, Roffset);
3053   if (!is_static && rc == may_rewrite) {
3054     patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no);
3055   }
3056   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3057     __ beq(CR_is_vol, Lvolatile); // Volatile?
3058   }
3059   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3060 
3061   __ align(32, 28, 28); // Align pop.
3062   // __ bind(Latos);
3063   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
3064   assert(branch_table[atos] == nullptr, "can't compute twice");
3065   branch_table[atos] = __ pc(); // non-volatile_entry point
3066   __ pop(atos);
3067   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1
3068   do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3069   if (!is_static && rc == may_rewrite) {
3070     patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no);
3071   }
3072   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3073     __ beq(CR_is_vol, Lvolatile); // Volatile?
3074     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3075 
3076     __ align(32, 12);
3077     __ bind(Lvolatile);
3078     __ fence();
3079   }
3080   // fallthru: __ b(Lexit);
3081 
3082 #ifdef ASSERT
3083   for (int i = 0; i<number_of_states; ++i) {
3084     assert(branch_table[i], "put initialization");
3085     //tty->print_cr("put: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
3086     //              is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
3087   }
3088 #endif
3089 }
3090 
3091 void TemplateTable::putfield(int byte_no) {
3092   putfield_or_static(byte_no, false);
3093 }
3094 
3095 void TemplateTable::nofast_putfield(int byte_no) {
3096   putfield_or_static(byte_no, false, may_not_rewrite);
3097 }
3098 
3099 void TemplateTable::putstatic(int byte_no) {
3100   putfield_or_static(byte_no, true);
3101 }
3102 
3103 // On PPC64, we have a different jvmti_post_field_mod which does the job.
3104 void TemplateTable::jvmti_post_fast_field_mod() {
3105   __ should_not_reach_here();
3106 }
3107 
3108 void TemplateTable::fast_storefield(TosState state) {
3109   transition(state, vtos);
3110 
3111   const Register Rcache        = R5_ARG3,  // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
3112                  Rclass_or_obj = R31,      // Needs to survive C call.
3113                  Roffset       = R22_tmp2, // Needs to survive C call.
3114                  Rflags        = R3_ARG1,
3115                  Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
3116                  Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
3117                  Rscratch3     = R4_ARG2;
3118   const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store).
3119 
3120   // Constant pool already resolved => Load flags and offset of field.
3121   __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
3122   jvmti_post_field_mod(Rcache, Rscratch, false /* not static */);
3123   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3124 
3125   // Get the obj and the final store addr.
3126   pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
3127 
3128   // Get volatile flag.
3129   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3130   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpdi(CR_is_vol, Rscratch, 1); }
3131   {
3132     Label LnotVolatile;
3133     __ beq(CR0, LnotVolatile);
3134     __ release();
3135     __ align(32, 12);
3136     __ bind(LnotVolatile);
3137   }
3138 
3139   // Do the store and fencing.
3140   switch(bytecode()) {
3141     case Bytecodes::_fast_aputfield:
3142       // Store into the field.
3143       do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
3144       break;
3145 
3146     case Bytecodes::_fast_iputfield:
3147       __ stwx(R17_tos, Rclass_or_obj, Roffset);
3148       break;
3149 
3150     case Bytecodes::_fast_lputfield:
3151       __ stdx(R17_tos, Rclass_or_obj, Roffset);
3152       break;
3153 
3154     case Bytecodes::_fast_zputfield:
3155       __ andi(R17_tos, R17_tos, 0x1);  // boolean is true if LSB is 1
3156       // fall through to bputfield
3157     case Bytecodes::_fast_bputfield:
3158       __ stbx(R17_tos, Rclass_or_obj, Roffset);
3159       break;
3160 
3161     case Bytecodes::_fast_cputfield:
3162     case Bytecodes::_fast_sputfield:
3163       __ sthx(R17_tos, Rclass_or_obj, Roffset);
3164       break;
3165 
3166     case Bytecodes::_fast_fputfield:
3167       __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
3168       break;
3169 
3170     case Bytecodes::_fast_dputfield:
3171       __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
3172       break;
3173 
3174     default: ShouldNotReachHere();
3175   }
3176 
3177   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
3178     Label LVolatile;
3179     __ beq(CR_is_vol, LVolatile);
3180     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
3181 
3182     __ align(32, 12);
3183     __ bind(LVolatile);
3184     __ fence();
3185   }
3186 }
3187 
3188 void TemplateTable::fast_accessfield(TosState state) {
3189   transition(atos, state);
3190 
3191   Label LisVolatile;
3192   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
3193 
3194   const Register Rcache        = R3_ARG1,
3195                  Rclass_or_obj = R17_tos,
3196                  Roffset       = R22_tmp2,
3197                  Rflags        = R23_tmp3,
3198                  Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
3199                  // R12_scratch2 used by load_field_cp_cache_entry
3200 
3201   // Constant pool already resolved. Get the field offset.
3202   __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
3203   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3204 
3205   // JVMTI support
3206   jvmti_post_field_access(Rcache, Rscratch, false, true);
3207 
3208   // Get the load address.
3209   __ null_check_throw(Rclass_or_obj, -1, Rscratch);
3210 
3211   // Get volatile flag.
3212   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3213   __ bne(CR0, LisVolatile);
3214 
3215   switch(bytecode()) {
3216     case Bytecodes::_fast_agetfield:
3217     {
3218       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3219       __ verify_oop(R17_tos);
3220       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3221 
3222       __ bind(LisVolatile);
3223       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3224       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3225       __ verify_oop(R17_tos);
3226       __ twi_0(R17_tos);
3227       __ isync();
3228       break;
3229     }
3230     case Bytecodes::_fast_igetfield:
3231     {
3232       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3233       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3234 
3235       __ bind(LisVolatile);
3236       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3237       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3238       __ twi_0(R17_tos);
3239       __ isync();
3240       break;
3241     }
3242     case Bytecodes::_fast_lgetfield:
3243     {
3244       __ ldx(R17_tos, Rclass_or_obj, Roffset);
3245       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3246 
3247       __ bind(LisVolatile);
3248       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3249       __ ldx(R17_tos, Rclass_or_obj, Roffset);
3250       __ twi_0(R17_tos);
3251       __ isync();
3252       break;
3253     }
3254     case Bytecodes::_fast_bgetfield:
3255     {
3256       __ lbzx(R17_tos, Rclass_or_obj, Roffset);
3257       __ extsb(R17_tos, R17_tos);
3258       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3259 
3260       __ bind(LisVolatile);
3261       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3262       __ lbzx(R17_tos, Rclass_or_obj, Roffset);
3263       __ twi_0(R17_tos);
3264       __ extsb(R17_tos, R17_tos);
3265       __ isync();
3266       break;
3267     }
3268     case Bytecodes::_fast_cgetfield:
3269     {
3270       __ lhzx(R17_tos, Rclass_or_obj, Roffset);
3271       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3272 
3273       __ bind(LisVolatile);
3274       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3275       __ lhzx(R17_tos, Rclass_or_obj, Roffset);
3276       __ twi_0(R17_tos);
3277       __ isync();
3278       break;
3279     }
3280     case Bytecodes::_fast_sgetfield:
3281     {
3282       __ lhax(R17_tos, Rclass_or_obj, Roffset);
3283       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3284 
3285       __ bind(LisVolatile);
3286       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3287       __ lhax(R17_tos, Rclass_or_obj, Roffset);
3288       __ twi_0(R17_tos);
3289       __ isync();
3290       break;
3291     }
3292     case Bytecodes::_fast_fgetfield:
3293     {
3294       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3295       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3296 
3297       __ bind(LisVolatile);
3298       Label Ldummy;
3299       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3300       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3301       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3302       __ bne_predict_not_taken(CR0, Ldummy);
3303       __ bind(Ldummy);
3304       __ isync();
3305       break;
3306     }
3307     case Bytecodes::_fast_dgetfield:
3308     {
3309       __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
3310       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
3311 
3312       __ bind(LisVolatile);
3313       Label Ldummy;
3314       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3315       __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
3316       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3317       __ bne_predict_not_taken(CR0, Ldummy);
3318       __ bind(Ldummy);
3319       __ isync();
3320       break;
3321     }
3322     default: ShouldNotReachHere();
3323   }
3324 }
3325 
3326 void TemplateTable::fast_xaccess(TosState state) {
3327   transition(vtos, state);
3328 
3329   Label LisVolatile;
3330   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
3331   const Register Rcache        = R3_ARG1,
3332                  Rclass_or_obj = R17_tos,
3333                  Roffset       = R22_tmp2,
3334                  Rflags        = R23_tmp3,
3335                  Rscratch      = R11_scratch1;
3336                  // R12_scratch2 used by load_field_cp_cache_entry
3337 
3338   __ ld(Rclass_or_obj, 0, R18_locals);
3339 
3340   // Constant pool already resolved. Get the field offset.
3341   __ load_field_entry(Rcache, Rscratch, 2, /* for_fast_bytecode */ true);
3342   load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
3343 
3344   // JVMTI support not needed, since we switch back to single bytecode as soon as debugger attaches.
3345 
3346   // Needed to report exception at the correct bcp.
3347   __ addi(R14_bcp, R14_bcp, 1);
3348 
3349   // Get the load address.
3350   __ null_check_throw(Rclass_or_obj, -1, Rscratch);
3351 
3352   // Get volatile flag.
3353   __ rldicl_(Rscratch, Rflags, 64-ResolvedFieldEntry::is_volatile_shift, 63); // Extract volatile bit.
3354   __ bne(CR0, LisVolatile);
3355 
3356   switch(state) {
3357   case atos:
3358     {
3359       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3360       __ verify_oop(R17_tos);
3361       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3362 
3363       __ bind(LisVolatile);
3364       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3365       do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
3366       __ verify_oop(R17_tos);
3367       __ twi_0(R17_tos);
3368       __ isync();
3369       break;
3370     }
3371   case itos:
3372     {
3373       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3374       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3375 
3376       __ bind(LisVolatile);
3377       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3378       __ lwax(R17_tos, Rclass_or_obj, Roffset);
3379       __ twi_0(R17_tos);
3380       __ isync();
3381       break;
3382     }
3383   case ftos:
3384     {
3385       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3386       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
3387 
3388       __ bind(LisVolatile);
3389       Label Ldummy;
3390       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
3391       __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
3392       __ fcmpu(CR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
3393       __ bne_predict_not_taken(CR0, Ldummy);
3394       __ bind(Ldummy);
3395       __ isync();
3396       break;
3397     }
3398   default: ShouldNotReachHere();
3399   }
3400   __ addi(R14_bcp, R14_bcp, -1);
3401 }
3402 
3403 // ============================================================================
3404 // Calls
3405 
3406 void TemplateTable::prepare_invoke(Register Rcache,
3407                                    Register Rret_addr,// return address
3408                                    Register Rrecv,    // If caller wants to see it.
3409                                    Register Rscratch
3410                                    ) {
3411   // Determine flags.
3412   const Bytecodes::Code code = bytecode();
3413   const bool load_receiver = (Rrecv != noreg);
3414   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3415 
3416   // Load receiver if needed (after appendix is pushed so parameter size is correct).
3417   if (load_receiver) {
3418     Register Rparam_count = Rscratch;
3419     __ lhz(Rparam_count, in_bytes(ResolvedMethodEntry::num_parameters_offset()), Rcache);
3420     __ load_receiver(Rparam_count, Rrecv);
3421     __ verify_oop(Rrecv);
3422   }
3423 
3424   // Get return address.
3425   {
3426     Register Rtable_addr = Rscratch;
3427     Register Rret_type = Rret_addr;
3428     address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3429 
3430     __ lbz(Rret_type, in_bytes(ResolvedMethodEntry::type_offset()), Rcache);
3431     __ load_dispatch_table(Rtable_addr, (address*)table_addr);
3432     __ sldi(Rret_type, Rret_type, LogBytesPerWord);
3433     // Get return address.
3434     __ ldx(Rret_addr, Rtable_addr, Rret_type);
3435   }
3436 }
3437 
3438 // Helper for virtual calls. Load target out of vtable and jump off!
3439 // Kills all passed registers.
3440 void TemplateTable::generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp) {
3441 
3442   assert_different_registers(Rrecv_klass, Rtemp, Rret);
3443   const Register Rtarget_method = Rindex;
3444 
3445   // Get target method & entry point.
3446   const ByteSize base = Klass::vtable_start_offset();
3447   // Calc vtable addr scale the vtable index by 8.
3448   __ sldi(Rindex, Rindex, exact_log2(vtableEntry::size_in_bytes()));
3449   // Load target.
3450   __ addi(Rrecv_klass, Rrecv_klass, in_bytes(base + vtableEntry::method_offset()));
3451   __ ldx(Rtarget_method, Rindex, Rrecv_klass);
3452   // Argument and return type profiling.
3453   __ profile_arguments_type(Rtarget_method, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */, true);
3454   __ call_from_interpreter(Rtarget_method, Rret, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */);
3455 }
3456 
3457 // Virtual or final call. Final calls are rewritten on the fly to run through "fast_finalcall" next time.
3458 void TemplateTable::invokevirtual(int byte_no) {
3459   transition(vtos, vtos);
3460 
3461   Register Rret_addr = R5_ARG3,
3462            Rflags = R22_tmp2, // Should survive C call.
3463            Rrecv = R3_ARG1,
3464            Rrecv_klass = Rrecv,
3465            Rvtableindex_or_method = R31, // Should survive C call.
3466            Rnew_bc = R6_ARG4,
3467            Rcache = R7_ARG5;
3468 
3469   Label LnotFinal;
3470 
3471   load_resolved_method_entry_virtual(Rcache, noreg, Rflags);
3472 
3473   // Handle final method separately.
3474   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3475   __ bfalse(CR0, LnotFinal);
3476 
3477   if (RewriteBytecodes && !CDSConfig::is_using_archive() && !CDSConfig::is_dumping_static_archive()) {
3478     patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2);
3479   }
3480   invokevfinal_helper(Rcache, R11_scratch1, R12_scratch2, Rflags /* tmp */, Rrecv /* tmp */);
3481 
3482   __ align(32, 12);
3483   __ bind(LnotFinal);
3484   prepare_invoke(Rcache, Rret_addr, Rrecv, R11_scratch1);
3485 
3486   // Get vtable index.
3487   __ lhz(Rvtableindex_or_method, in_bytes(ResolvedMethodEntry::table_index_offset()), Rcache);
3488 
3489   // Get receiver klass.
3490   __ load_klass_check_null_throw(Rrecv_klass, Rrecv, R11_scratch1);
3491   __ verify_klass_ptr(Rrecv_klass);
3492   __ profile_virtual_call(Rrecv_klass, R11_scratch1, R12_scratch2);
3493 
3494   generate_vtable_call(Rrecv_klass, Rvtableindex_or_method, Rret_addr, R11_scratch1);
3495 }
3496 
3497 void TemplateTable::fast_invokevfinal(int byte_no) {
3498   transition(vtos, vtos);
3499 
3500   assert(byte_no == f2_byte, "use this argument");
3501   Register Rcache  = R31;
3502   __ load_method_entry(Rcache, R11_scratch1, 1, /* for_fast_bytecode */ true);
3503   invokevfinal_helper(Rcache, R11_scratch1, R12_scratch2, R22_tmp2, R23_tmp3);
3504 }
3505 
3506 void TemplateTable::invokevfinal_helper(Register Rcache,
3507                                         Register Rscratch1, Register Rscratch2, Register Rscratch3, Register Rscratch4) {
3508 
3509   assert_different_registers(Rcache, Rscratch1, Rscratch2, Rscratch3, Rscratch4);
3510 
3511   Register Rrecv     = Rscratch2,
3512            Rmethod   = Rscratch3,
3513            Rret_addr = Rscratch4;
3514   prepare_invoke(Rcache, Rret_addr, Rrecv, Rscratch1);
3515 
3516   // Receiver null check.
3517   __ null_check_throw(Rrecv, -1, Rscratch1);
3518 
3519   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3520 
3521   __ profile_final_call(Rrecv, Rscratch1);
3522   // Argument and return type profiling.
3523   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, true);
3524 
3525   // Do the call.
3526   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1, Rscratch2);
3527 }
3528 
3529 void TemplateTable::invokespecial(int byte_no) {
3530   assert(byte_no == f1_byte, "use this argument");
3531   transition(vtos, vtos);
3532 
3533   Register Rcache      = R3_ARG1,
3534            Rret_addr   = R4_ARG2,
3535            Rreceiver   = R5_ARG3,
3536            Rmethod     = R31;
3537 
3538   load_resolved_method_entry_special_or_static(Rcache,  // ResolvedMethodEntry*
3539                                                Rmethod, // Method*
3540                                                noreg);  // flags
3541   prepare_invoke(Rcache, Rret_addr, Rreceiver, R11_scratch1);
3542 
3543   // Receiver null check.
3544   __ null_check_throw(Rreceiver, -1, R11_scratch1);
3545 
3546   __ profile_call(R11_scratch1, R12_scratch2);
3547   // Argument and return type profiling.
3548   __ profile_arguments_type(Rmethod, R11_scratch1, R12_scratch2, false);
3549   __ call_from_interpreter(Rmethod, Rret_addr, R11_scratch1, R12_scratch2);
3550 }
3551 
3552 void TemplateTable::invokestatic(int byte_no) {
3553   assert(byte_no == f1_byte, "use this argument");
3554   transition(vtos, vtos);
3555 
3556   Register Rcache    = R3_ARG1,
3557            Rret_addr = R4_ARG2;
3558 
3559   load_resolved_method_entry_special_or_static(Rcache,     // ResolvedMethodEntry*
3560                                                R19_method, // Method*
3561                                                noreg);     // flags
3562   prepare_invoke(Rcache, Rret_addr, noreg, R11_scratch1);
3563 
3564   __ profile_call(R11_scratch1, R12_scratch2);
3565   // Argument and return type profiling.
3566   __ profile_arguments_type(R19_method, R11_scratch1, R12_scratch2, false);
3567   __ call_from_interpreter(R19_method, Rret_addr, R11_scratch1, R12_scratch2);
3568 }
3569 
3570 void TemplateTable::invokeinterface_object_method(Register Rrecv_klass,
3571                                                   Register Rret,
3572                                                   Register Rflags,
3573                                                   Register Rcache,
3574                                                   Register Rtemp1,
3575                                                   Register Rtemp2) {
3576 
3577   assert_different_registers(Rcache, Rret, Rrecv_klass, Rflags, Rtemp1, Rtemp2);
3578   Label LnotFinal;
3579 
3580   // Check for vfinal.
3581   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3582   __ bfalse(CR0, LnotFinal);
3583 
3584   Register Rscratch = Rflags, // Rflags is dead now.
3585            Rmethod  = Rtemp2,
3586            Rindex   = Rtemp2;
3587 
3588   // Final call case.
3589   __ profile_final_call(Rtemp1, Rscratch);
3590   // Argument and return type profiling.
3591   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3592   __ profile_arguments_type(Rmethod, Rscratch, Rrecv_klass /* scratch */, true);
3593   // Do the final call - the index (f2) contains the method.
3594   __ call_from_interpreter(Rmethod, Rret, Rscratch, Rrecv_klass /* scratch */);
3595 
3596   // Non-final callc case.
3597   __ bind(LnotFinal);
3598   __ lhz(Rindex, in_bytes(ResolvedMethodEntry::table_index_offset()), Rcache);
3599   __ profile_virtual_call(Rrecv_klass, Rtemp1, Rscratch);
3600   generate_vtable_call(Rrecv_klass, Rindex, Rret, Rscratch);
3601 }
3602 
3603 void TemplateTable::invokeinterface(int byte_no) {
3604   assert(byte_no == f1_byte, "use this argument");
3605   transition(vtos, vtos);
3606 
3607   const Register Rscratch1        = R11_scratch1,
3608                  Rscratch2        = R12_scratch2,
3609                  Rreceiver        = R3_ARG1,
3610                  Rrecv_klass      = R4_ARG2,
3611                  Rinterface_klass = R5_ARG3,
3612                  Rmethod          = R6_ARG4,
3613                  Rmethod2         = R7_ARG5,
3614                  Rret_addr        = R8_ARG6,
3615                  Rindex           = R9_ARG7,
3616                  Rflags           = R10_ARG8,
3617                  Rcache           = R31;
3618 
3619   load_resolved_method_entry_interface(Rcache, noreg, noreg, Rflags);
3620   prepare_invoke(Rcache, Rret_addr, Rreceiver, Rscratch1);
3621 
3622   // First check for Object case, then private interface method,
3623   // then regular interface method.
3624 
3625   // Get receiver klass - this is also a null check
3626   __ load_klass_check_null_throw(Rrecv_klass, Rreceiver, Rscratch2);
3627 
3628   // Check corner case object method.
3629   // Special case of invokeinterface called for virtual method of
3630   // java.lang.Object. See ResolvedMethodEntry for details:
3631   // The invokeinterface was rewritten to a invokevirtual, hence we have
3632   // to handle this corner case.
3633 
3634   Label LnotObjectMethod, Lthrow_ame;
3635   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_forced_virtual_shift);
3636   __ bfalse(CR0, LnotObjectMethod);
3637   invokeinterface_object_method(Rrecv_klass, Rret_addr, Rflags, Rcache, Rscratch1, Rscratch2);
3638   __ bind(LnotObjectMethod);
3639 
3640   __ ld(Rinterface_klass, in_bytes(ResolvedMethodEntry::klass_offset()), Rcache);
3641   __ ld(Rmethod, in_bytes(ResolvedMethodEntry::method_offset()), Rcache);
3642 
3643   // Check for private method invocation - indicated by vfinal
3644   Label LnotVFinal, L_no_such_interface, L_subtype;
3645 
3646   __ testbitdi(CR0, R0, Rflags, ResolvedMethodEntry::is_vfinal_shift);
3647   __ bfalse(CR0, LnotVFinal);
3648 
3649   __ check_klass_subtype(Rrecv_klass, Rinterface_klass, Rscratch1, Rscratch2, L_subtype);
3650   // If we get here the typecheck failed
3651   __ b(L_no_such_interface);
3652   __ bind(L_subtype);
3653 
3654   // do the call
3655 
3656   Register Rscratch = Rflags; // Rflags is dead now.
3657 
3658   __ profile_final_call(Rscratch1, Rscratch);
3659   __ profile_arguments_type(Rmethod, Rscratch, Rrecv_klass /* scratch */, true);
3660 
3661   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch, Rrecv_klass /* scratch */);
3662 
3663   __ bind(LnotVFinal);
3664   __ lookup_interface_method(Rrecv_klass, Rinterface_klass, noreg, noreg, Rscratch1, Rscratch2,
3665                              L_no_such_interface, /*return_method=*/false);
3666 
3667   __ profile_virtual_call(Rrecv_klass, Rscratch1, Rscratch2);
3668 
3669   // Find entry point to call.
3670 
3671   // Get declaring interface class from method
3672   __ load_method_holder(Rinterface_klass, Rmethod);
3673 
3674   // Get itable index from method
3675   __ lwa(Rindex, in_bytes(Method::itable_index_offset()), Rmethod);
3676   __ subfic(Rindex, Rindex, Method::itable_index_max);
3677 
3678   __ lookup_interface_method(Rrecv_klass, Rinterface_klass, Rindex, Rmethod2, Rscratch1, Rscratch2,
3679                              L_no_such_interface);
3680 
3681   __ cmpdi(CR0, Rmethod2, 0);
3682   __ beq(CR0, Lthrow_ame);
3683   // Found entry. Jump off!
3684   // Argument and return type profiling.
3685   __ profile_arguments_type(Rmethod2, Rscratch1, Rscratch2, true);
3686   __ call_from_interpreter(Rmethod2, Rret_addr, Rscratch1, Rscratch2);
3687 
3688   // Vtable entry was null => Throw abstract method error.
3689   __ bind(Lthrow_ame);
3690   // Pass arguments for generating a verbose error message.
3691   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose),
3692           Rrecv_klass, Rmethod);
3693 
3694   // Interface was not found => Throw incompatible class change error.
3695   __ bind(L_no_such_interface);
3696   // Pass arguments for generating a verbose error message.
3697   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose),
3698           Rrecv_klass, Rinterface_klass);
3699   DEBUG_ONLY( __ should_not_reach_here(); )
3700 }
3701 
3702 void TemplateTable::invokedynamic(int byte_no) {
3703   transition(vtos, vtos);
3704 
3705   const Register Rret_addr = R3_RET;
3706   const Register Rmethod   = R22_tmp2;
3707   const Register Rscratch1 = R30;
3708   const Register Rscratch2 = R11_scratch1;
3709 
3710   // Returns target method in Rmethod and return address in R3_RET. Kills all argument registers.
3711   load_invokedynamic_entry(Rmethod);
3712 
3713   // Profile this call.
3714   __ profile_call(Rscratch1, Rscratch2);
3715 
3716   // Off we go. With the new method handles, we don't jump to a method handle
3717   // entry any more. Instead, we pushed an "appendix" in prepare invoke, which happens
3718   // to be the callsite object the bootstrap method returned. This is passed to a
3719   // "link" method which does the dispatch (Most likely just grabs the MH stored
3720   // inside the callsite and does an invokehandle).
3721   // Argument and return type profiling.
3722   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, false);
3723   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
3724 }
3725 
3726 void TemplateTable::invokehandle(int byte_no) {
3727   transition(vtos, vtos);
3728 
3729   const Register Rret_addr = R3_ARG1,
3730                  Rflags    = R12_scratch2,
3731                  Rrecv     = R5_ARG3,
3732                  Rmethod   = R22_tmp2,
3733                  Rscratch1 = R30,
3734                  Rscratch2 = R11_scratch1,
3735                  Rcache    = R31;
3736 
3737   load_resolved_method_entry_handle(Rcache,  // ResolvedMethodEntry*
3738                                     Rmethod, // Method*
3739                                     Rscratch1,
3740                                     Rflags);
3741   prepare_invoke(Rcache, Rret_addr, Rrecv, Rscratch1);
3742   __ verify_method_ptr(Rmethod);
3743   __ null_check_throw(Rrecv, -1, Rscratch2);
3744 
3745   __ profile_final_call(Rrecv, Rscratch1);
3746 
3747   // Still no call from handle => We call the method handle interpreter here.
3748   // Argument and return type profiling.
3749   __ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, true);
3750   __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
3751 }
3752 
3753 // =============================================================================
3754 // Allocation
3755 
3756 // Puts allocated obj ref onto the expression stack.
3757 void TemplateTable::_new() {
3758   transition(vtos, atos);
3759 
3760   Label Lslow_case,
3761         Ldone;
3762 
3763   const Register RallocatedObject = R17_tos,
3764                  RinstanceKlass   = R9_ARG7,
3765                  Rscratch         = R11_scratch1,
3766                  Roffset          = R8_ARG6,
3767                  Rinstance_size   = Roffset,
3768                  Rcpool           = R4_ARG2,
3769                  Rtags            = R3_ARG1,
3770                  Rindex           = R5_ARG3;
3771 
3772   // --------------------------------------------------------------------------
3773   // Check if fast case is possible.
3774 
3775   // Load pointers to const pool and const pool's tags array.
3776   __ get_cpool_and_tags(Rcpool, Rtags);
3777   // Load index of constant pool entry.
3778   __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
3779 
3780   // Note: compared to other architectures, PPC's implementation always goes
3781   // to the slow path if TLAB is used and fails.
3782   if (UseTLAB) {
3783     // Make sure the class we're about to instantiate has been resolved
3784     // This is done before loading instanceKlass to be consistent with the order
3785     // how Constant Pool is updated (see ConstantPoolCache::klass_at_put).
3786     __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
3787     __ lbzx(Rtags, Rindex, Rtags);
3788 
3789     __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
3790     __ bne(CR0, Lslow_case);
3791 
3792     // Get instanceKlass
3793     __ sldi(Roffset, Rindex, LogBytesPerWord);
3794     __ load_resolved_klass_at_offset(Rcpool, Roffset, RinstanceKlass);
3795 
3796     // Make sure klass is initialized.
3797     assert(VM_Version::supports_fast_class_init_checks(), "Optimization requires support for fast class initialization checks");
3798     __ clinit_barrier(RinstanceKlass, R16_thread, nullptr /*L_fast_path*/, &Lslow_case);
3799 
3800     __ lwz(Rinstance_size, in_bytes(Klass::layout_helper_offset()), RinstanceKlass);
3801 
3802     // Make sure klass is not abstract, or interface or java/lang/Class.
3803     __ andi_(R0, Rinstance_size, Klass::_lh_instance_slow_path_bit); // slow path bit equals 0?
3804     __ bne(CR0, Lslow_case);
3805 
3806     // --------------------------------------------------------------------------
3807     // Fast case:
3808     // Allocate the instance.
3809     // 1) Try to allocate in the TLAB.
3810     // 2) If the above fails (or is not applicable), go to a slow case (creates a new TLAB, etc.).
3811 
3812     Register RoldTopValue = RallocatedObject; // Object will be allocated here if it fits.
3813     Register RnewTopValue = R6_ARG4;
3814     Register RendValue    = R7_ARG5;
3815 
3816     // Check if we can allocate in the TLAB.
3817     __ ld(RoldTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
3818     __ ld(RendValue,    in_bytes(JavaThread::tlab_end_offset()), R16_thread);
3819 
3820     __ add(RnewTopValue, Rinstance_size, RoldTopValue);
3821 
3822     // If there is enough space, we do not CAS and do not clear.
3823     __ cmpld(CR0, RnewTopValue, RendValue);
3824     __ bgt(CR0, Lslow_case);
3825 
3826     __ std(RnewTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
3827 
3828     if (!ZeroTLAB) {
3829       // --------------------------------------------------------------------------
3830       // Init1: Zero out newly allocated memory.
3831       // Initialize remaining object fields.
3832       Register Rbase = Rtags;
3833       int header_size = oopDesc::header_size() * HeapWordSize;
3834       __ addi(Rinstance_size, Rinstance_size, 7 - header_size);
3835       __ addi(Rbase, RallocatedObject, header_size);
3836       __ srdi(Rinstance_size, Rinstance_size, 3);
3837 
3838       // Clear out object skipping header. Takes also care of the zero length case.
3839       __ clear_memory_doubleword(Rbase, Rinstance_size);
3840     }
3841 
3842     // --------------------------------------------------------------------------
3843     // Init2: Initialize the header: mark, klass
3844     // Init mark.
3845     if (UseCompactObjectHeaders) {
3846       __ ld(Rscratch, in_bytes(Klass::prototype_header_offset()), RinstanceKlass);
3847       __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject);
3848     } else {
3849       __ load_const_optimized(Rscratch, markWord::prototype().value(), R0);
3850       __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject);
3851       __ store_klass_gap(RallocatedObject);
3852       __ store_klass(RallocatedObject, RinstanceKlass, Rscratch);
3853     }
3854 
3855     __ b(Ldone);
3856   }
3857 
3858   // --------------------------------------------------------------------------
3859   // slow case
3860   __ bind(Lslow_case);
3861   __ call_VM_preemptable(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Rcpool, Rindex);
3862 
3863   // continue
3864   __ bind(Ldone);
3865 
3866   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3867   __ membar(Assembler::StoreStore);
3868 }
3869 
3870 void TemplateTable::newarray() {
3871   transition(itos, atos);
3872 
3873   __ lbz(R4, 1, R14_bcp);
3874   __ extsw(R5, R17_tos);
3875   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), R4, R5 /* size */);
3876 
3877   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3878   __ membar(Assembler::StoreStore);
3879 }
3880 
3881 void TemplateTable::anewarray() {
3882   transition(itos, atos);
3883 
3884   __ get_constant_pool(R4);
3885   __ get_2_byte_integer_at_bcp(1, R5, InterpreterMacroAssembler::Unsigned);
3886   __ extsw(R6, R17_tos); // size
3887   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), R4 /* pool */, R5 /* index */, R6 /* size */);
3888 
3889   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3890   __ membar(Assembler::StoreStore);
3891 }
3892 
3893 // Allocate a multi dimensional array
3894 void TemplateTable::multianewarray() {
3895   transition(vtos, atos);
3896 
3897   Register Rptr = R31; // Needs to survive C call.
3898 
3899   // Put ndims * wordSize into frame temp slot
3900   __ lbz(Rptr, 3, R14_bcp);
3901   __ sldi(Rptr, Rptr, Interpreter::logStackElementSize);
3902   // Esp points past last_dim, so set to R4 to first_dim address.
3903   __ add(R4, Rptr, R15_esp);
3904   call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R4 /* first_size_address */);
3905   // Pop all dimensions off the stack.
3906   __ add(R15_esp, Rptr, R15_esp);
3907 
3908   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3909   __ membar(Assembler::StoreStore);
3910 }
3911 
3912 void TemplateTable::arraylength() {
3913   transition(atos, itos);
3914 
3915   __ verify_oop(R17_tos);
3916   __ null_check_throw(R17_tos, arrayOopDesc::length_offset_in_bytes(), R11_scratch1);
3917   __ lwa(R17_tos, arrayOopDesc::length_offset_in_bytes(), R17_tos);
3918 }
3919 
3920 // ============================================================================
3921 // Typechecks
3922 
3923 void TemplateTable::checkcast() {
3924   transition(atos, atos);
3925 
3926   Label Ldone, Lis_null, Lquicked, Lresolved;
3927   Register Roffset         = R6_ARG4,
3928            RobjKlass       = R4_ARG2,
3929            RspecifiedKlass = R5_ARG3, // Generate_ClassCastException_verbose_handler will read value from this register.
3930            Rcpool          = R11_scratch1,
3931            Rtags           = R12_scratch2;
3932 
3933   // Null does not pass.
3934   __ cmpdi(CR0, R17_tos, 0);
3935   __ beq(CR0, Lis_null);
3936 
3937   // Get constant pool tag to find out if the bytecode has already been "quickened".
3938   __ get_cpool_and_tags(Rcpool, Rtags);
3939 
3940   __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
3941 
3942   __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
3943   __ lbzx(Rtags, Rtags, Roffset);
3944 
3945   __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
3946   __ beq(CR0, Lquicked);
3947 
3948   // Call into the VM to "quicken" instanceof.
3949   __ push_ptr();  // for GC
3950   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3951   __ get_vm_result_metadata(RspecifiedKlass);
3952   __ pop_ptr();   // Restore receiver.
3953   __ b(Lresolved);
3954 
3955   // Extract target class from constant pool.
3956   __ bind(Lquicked);
3957   __ sldi(Roffset, Roffset, LogBytesPerWord);
3958   __ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
3959 
3960   // Do the checkcast.
3961   __ bind(Lresolved);
3962   // Get value klass in RobjKlass.
3963   __ load_klass(RobjKlass, R17_tos);
3964   // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
3965   __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
3966 
3967   // Not a subtype; so must throw exception
3968   // Target class oop is in register R6_ARG4 == RspecifiedKlass by convention.
3969   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ClassCastException_entry);
3970   __ mtctr(R11_scratch1);
3971   __ bctr();
3972 
3973   // Profile the null case.
3974   __ align(32, 12);
3975   __ bind(Lis_null);
3976   __ profile_null_seen(R11_scratch1, Rtags); // Rtags used as scratch.
3977 
3978   __ align(32, 12);
3979   __ bind(Ldone);
3980 }
3981 
3982 // Output:
3983 //   - tos == 0: Obj was null or not an instance of class.
3984 //   - tos == 1: Obj was an instance of class.
3985 void TemplateTable::instanceof() {
3986   transition(atos, itos);
3987 
3988   Label Ldone, Lis_null, Lquicked, Lresolved;
3989   Register Roffset         = R6_ARG4,
3990            RobjKlass       = R4_ARG2,
3991            RspecifiedKlass = R5_ARG3,
3992            Rcpool          = R11_scratch1,
3993            Rtags           = R12_scratch2;
3994 
3995   // Null does not pass.
3996   __ cmpdi(CR0, R17_tos, 0);
3997   __ beq(CR0, Lis_null);
3998 
3999   // Get constant pool tag to find out if the bytecode has already been "quickened".
4000   __ get_cpool_and_tags(Rcpool, Rtags);
4001 
4002   __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
4003 
4004   __ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
4005   __ lbzx(Rtags, Rtags, Roffset);
4006 
4007   __ cmpdi(CR0, Rtags, JVM_CONSTANT_Class);
4008   __ beq(CR0, Lquicked);
4009 
4010   // Call into the VM to "quicken" instanceof.
4011   __ push_ptr();  // for GC
4012   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4013   __ get_vm_result_metadata(RspecifiedKlass);
4014   __ pop_ptr();   // Restore receiver.
4015   __ b(Lresolved);
4016 
4017   // Extract target class from constant pool.
4018   __ bind(Lquicked);
4019   __ sldi(Roffset, Roffset, LogBytesPerWord);
4020   __ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
4021 
4022   // Do the checkcast.
4023   __ bind(Lresolved);
4024   // Get value klass in RobjKlass.
4025   __ load_klass(RobjKlass, R17_tos);
4026   // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
4027   __ li(R17_tos, 1);
4028   __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
4029   __ li(R17_tos, 0);
4030 
4031   if (ProfileInterpreter) {
4032     __ b(Ldone);
4033   }
4034 
4035   // Profile the null case.
4036   __ align(32, 12);
4037   __ bind(Lis_null);
4038   __ profile_null_seen(Rcpool, Rtags); // Rcpool and Rtags used as scratch.
4039 
4040   __ align(32, 12);
4041   __ bind(Ldone);
4042 }
4043 
4044 // =============================================================================
4045 // Breakpoints
4046 
4047 void TemplateTable::_breakpoint() {
4048   transition(vtos, vtos);
4049 
4050   // Get the unpatched byte code.
4051   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R19_method, R14_bcp);
4052   __ mr(R31, R3_RET);
4053 
4054   // Post the breakpoint event.
4055   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), R19_method, R14_bcp);
4056 
4057   // Complete the execution of original bytecode.
4058   __ dispatch_Lbyte_code(vtos, R31, Interpreter::normal_table(vtos));
4059 }
4060 
4061 // =============================================================================
4062 // Exceptions
4063 
4064 void TemplateTable::athrow() {
4065   transition(atos, vtos);
4066 
4067   // Exception oop is in tos
4068   __ verify_oop(R17_tos);
4069 
4070   __ null_check_throw(R17_tos, -1, R11_scratch1);
4071 
4072   // Throw exception interpreter entry expects exception oop to be in R3.
4073   __ mr(R3_RET, R17_tos);
4074   __ load_dispatch_table(R11_scratch1, (address*)Interpreter::throw_exception_entry());
4075   __ mtctr(R11_scratch1);
4076   __ bctr();
4077 }
4078 
4079 // =============================================================================
4080 // Synchronization
4081 // Searches the basic object lock list on the stack for a free slot
4082 // and uses it to lock the object in tos.
4083 //
4084 // Recursive locking is enabled by exiting the search if the same
4085 // object is already found in the list. Thus, a new basic lock obj lock
4086 // is allocated "higher up" in the stack and thus is found first
4087 // at next monitor exit.
4088 void TemplateTable::monitorenter() {
4089   transition(atos, vtos);
4090   __ verify_oop(R17_tos);
4091 
4092   Register Rcurrent_monitor  = R3_ARG1,
4093            Rcurrent_obj      = R4_ARG2,
4094            Robj_to_lock      = R17_tos,
4095            Rscratch1         = R11_scratch1,
4096            Rscratch2         = R12_scratch2,
4097            Rbot              = R5_ARG3,
4098            Rfree_slot        = R6_ARG4;
4099 
4100   Label Lfound, Lallocate_new;
4101 
4102   __ ld(Rscratch1, _abi0(callers_sp), R1_SP); // load FP
4103   __ li(Rfree_slot, 0); // Points to free slot or null.
4104 
4105   // Set up search loop - start with topmost monitor.
4106   __ mr(Rcurrent_monitor, R26_monitor);
4107   __ addi(Rbot, Rscratch1, -frame::ijava_state_size);
4108 
4109   // ------------------------------------------------------------------------------
4110   // Null pointer exception.
4111   __ null_check_throw(Robj_to_lock, -1, Rscratch1);
4112 
4113   // Check if any slot is present => short cut to allocation if not.
4114   __ cmpld(CR0, Rcurrent_monitor, Rbot);
4115   __ beq(CR0, Lallocate_new);
4116 
4117   // ------------------------------------------------------------------------------
4118   // Find a free slot in the monitor block.
4119   // Note: The order of the monitors is important for C2 OSR which derives the
4120   //       unlock order from it (see comments for interpreter_frame_monitor_*).
4121   {
4122     Label Lloop, LnotFree, Lexit;
4123 
4124     __ bind(Lloop);
4125     __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor);
4126     // Exit if current entry is for same object; this guarantees, that new monitor
4127     // used for recursive lock is above the older one.
4128     __ cmpd(CR0, Rcurrent_obj, Robj_to_lock);
4129     __ beq(CR0, Lexit); // recursive locking
4130 
4131     __ cmpdi(CR0, Rcurrent_obj, 0);
4132     __ bne(CR0, LnotFree);
4133     __ mr(Rfree_slot, Rcurrent_monitor); // remember free slot closest to the bottom
4134     __ bind(LnotFree);
4135 
4136     __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
4137     __ cmpld(CR0, Rcurrent_monitor, Rbot);
4138     __ bne(CR0, Lloop);
4139     __ bind(Lexit);
4140   }
4141 
4142   // ------------------------------------------------------------------------------
4143   // Check if we found a free slot.
4144   __ cmpdi(CR0, Rfree_slot, 0);
4145   __ bne(CR0, Lfound);
4146 
4147   // We didn't find a free BasicObjLock => allocate one.
4148   __ bind(Lallocate_new);
4149   __ add_monitor_to_stack(false, Rscratch1, Rscratch2);
4150   __ mr(Rfree_slot, R26_monitor);
4151 
4152   // ------------------------------------------------------------------------------
4153   // We now have a slot to lock.
4154   __ bind(Lfound);
4155 
4156   // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
4157   // The object has already been popped from the stack, so the expression stack looks correct.
4158   __ addi(R14_bcp, R14_bcp, 1);
4159 
4160   __ std(Robj_to_lock, in_bytes(BasicObjectLock::obj_offset()), Rfree_slot);
4161   __ lock_object(Rfree_slot, Robj_to_lock);
4162 
4163   // Check if there's enough space on the stack for the monitors after locking.
4164   // This emits a single store.
4165   __ generate_stack_overflow_check(0);
4166 
4167   // The bcp has already been incremented. Just need to dispatch to next instruction.
4168   __ dispatch_next(vtos);
4169 }
4170 
4171 void TemplateTable::monitorexit() {
4172   transition(atos, vtos);
4173   __ verify_oop(R17_tos);
4174 
4175   Register Rcurrent_monitor  = R3_ARG1,
4176            Rcurrent_obj      = R4_ARG2,
4177            Robj_to_lock      = R17_tos,
4178            Rscratch          = R11_scratch1,
4179            Rbot              = R12_scratch2;
4180 
4181   Label Lfound, Lillegal_monitor_state;
4182 
4183   __ ld(Rscratch, _abi0(callers_sp), R1_SP); // load FP
4184 
4185   // Set up search loop - start with topmost monitor.
4186   __ mr(Rcurrent_monitor, R26_monitor);
4187   __ addi(Rbot, Rscratch, -frame::ijava_state_size);
4188 
4189   // Null pointer check.
4190   __ null_check_throw(Robj_to_lock, -1, Rscratch);
4191 
4192   // Check corner case: unbalanced monitorEnter / Exit.
4193   __ cmpld(CR0, Rcurrent_monitor, Rbot);
4194   __ beq(CR0, Lillegal_monitor_state);
4195 
4196   // Find the corresponding slot in the monitors stack section.
4197   {
4198     Label Lloop;
4199 
4200     __ bind(Lloop);
4201     __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor);
4202     // Is this entry for same obj?
4203     __ cmpd(CR0, Rcurrent_obj, Robj_to_lock);
4204     __ beq(CR0, Lfound);
4205 
4206     __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
4207     __ cmpld(CR0, Rcurrent_monitor, Rbot);
4208     __ bne(CR0, Lloop);
4209   }
4210 
4211   // Fell through without finding the basic obj lock => throw up!
4212   __ bind(Lillegal_monitor_state);
4213   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
4214   __ should_not_reach_here();
4215 
4216   __ align(32, 12);
4217   __ bind(Lfound);
4218   __ unlock_object(Rcurrent_monitor);
4219 }
4220 
4221 // ============================================================================
4222 // Wide bytecodes
4223 
4224 // Wide instructions. Simply redirects to the wide entry point for that instruction.
4225 void TemplateTable::wide() {
4226   transition(vtos, vtos);
4227 
4228   const Register Rtable = R11_scratch1,
4229                  Rindex = R12_scratch2,
4230                  Rtmp   = R0;
4231 
4232   __ lbz(Rindex, 1, R14_bcp);
4233 
4234   __ load_dispatch_table(Rtable, Interpreter::_wentry_point);
4235 
4236   __ slwi(Rindex, Rindex, LogBytesPerWord);
4237   __ ldx(Rtmp, Rtable, Rindex);
4238   __ mtctr(Rtmp);
4239   __ bctr();
4240   // Note: the bcp increment step is part of the individual wide bytecode implementations.
4241 }