1 /*
   2  * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "nativeInst_aarch32.hpp"
  32 #include "oops/instanceOop.hpp"
  33 #include "oops/method.hpp"
  34 #include "oops/objArrayKlass.hpp"
  35 #include "oops/oop.inline.hpp"
  36 #include "prims/methodHandles.hpp"
  37 #include "runtime/frame.inline.hpp"
  38 #include "runtime/handles.inline.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubCodeGenerator.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/thread.inline.hpp"
  43 #include "utilities/top.hpp"
  44 #include "vm_version_aarch32.hpp"
  45 #ifdef COMPILER2
  46 #include "opto/runtime.hpp"
  47 #endif
  48 
  49 
  50 // Declaration and definition of StubGenerator (no .hpp file).
  51 // For a more detailed description of the stub routine structure
  52 // see the comment in stubRoutines.hpp
  53 
  54 #undef __
  55 #define __ _masm->
  56 #define TIMES_OOP lsl(exact_log2(4))
  57 
  58 #ifdef PRODUCT
  59 #define BLOCK_COMMENT(str) /* nothing */
  60 #else
  61 #define BLOCK_COMMENT(str) __ block_comment(str)
  62 #endif
  63 
  64 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  65 
  66 // Stub Code definitions
  67 
  68 class StubGenerator: public StubCodeGenerator {
  69  private:
  70 
  71 #ifdef PRODUCT
  72 #define inc_counter_np(counter) ((void)0)
  73 #else
  74   void inc_counter_np_(int& counter) {
  75     __ lea(rscratch2, ExternalAddress((address)&counter));
  76     __ ldr(rscratch1, Address(rscratch2));
  77     __ add(rscratch1, rscratch1, 1);
  78     __ str(rscratch1, Address(rscratch2));
  79   }
  80 #define inc_counter_np(counter) \
  81   BLOCK_COMMENT("inc_counter " #counter); \
  82   inc_counter_np_(counter);
  83 #endif
  84 
  85   // Call stubs are used to call Java from C
  86   //
  87   // There are only four registers available to house arguments and we're expecting eight
  88   // the layout will be as follows:
  89 
  90   // c_rarg0 = call wrapper address
  91   // c_rarg1 = result
  92   // c_rarg2 = result type
  93   // c_rarg3 = method
  94   // sp -> [ entry_point
  95   //         parameters -> java params
  96   //         parameter size (in words)
  97   //         thread] (address increasing)
  98   //
  99   // We don't
 100   // NEW!! layout for aarch32 so that save and restore can be collapsed into a single
 101   // load/store
 102   // layout of saved registers now is
 103   // 0   [ saved lr      ] <- rfp
 104   // -1  [ saved fp      ]
 105   // -2  [ r12/rthread   ] Thread passed in args
 106   // -3  [ r10/rmethod   ] NOTE omitted rfp as restored automatically
 107   // -4  [ r9/rscratch1  ] Platform register?
 108   // -5  [ r8/thread     ]
 109   // -6  [ r7/rcpool     ]
 110   // -7  [ r6/rlocals    ]
 111   // -8  [ r5/rbcp       ]
 112   // -9  [ r4/rdispatch  ]
 113   // -10 [ r2/res type   ]
 114   // -11 [ r1/result     ]
 115   // -12 [r0/call wrapper]<- sp (when restored from fp value)
 116   // -13 maybe alignment
 117   // -YY [ java arg0     ]
 118   //   ...
 119   // -xx [ java argn     ] <- sp on branch into java
 120   //
 121   // XXX Note we do not save floating point registers
 122   // Only floating point registers s16-31 / d8-15 need to be saved
 123   // these are never touched by template interpreted code.
 124   // On a sequence such as C -> Java -> C, the C functions will save them if used.
 125 
 126   static const int thread_off = -2 * wordSize; // The offset of the saved thread
 127 
 128   address generate_call_stub(address& return_address) {
 129     /*assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
 130            (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
 131            "adjust this code");*/
 132 
 133     StubCodeMark mark(this, "StubRoutines", "call_stub");
 134     address start = __ pc();
 135     __ reg_printf("entering call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr);
 136     __ enter(); //save rfp & lr !!NOTE PUSHES TWO REGISTERS TO STACK
 137 
 138     const int entry_point_arg_off = 1 * wordSize,
 139               params_arg_off      = 2 * wordSize,
 140               param_sz_arg_off    = 3 * wordSize,
 141               thread_arg_off      = 4 * wordSize;
 142     // r12 is a scratch register so we can clobber it to save thread
 143     // which is needed at the end
 144     __ ldr(r12, Address(rfp, thread_arg_off));
 145     // r0, r1, r2, r4 - r10, r12
 146     // we save r0 as the call_wrapper_address is needed elsewhere
 147     // we save r1, r2 as they hold the result and it's type,
 148     // which are needed on return
 149     // r12 holds the thread ptr
 150     unsigned c_save_regset = 0b0001011111110111;
 151     int nsaved = __ count_bits(c_save_regset);
 152     __ stmdb(sp, c_save_regset);
 153 
 154     // Offset from rfp to end of stack.
 155     const int rfp_tos_offset_bytes = frame::offset_from_rfp_bytes + nsaved * wordSize;
 156 
 157     // install Java thread in global register now we have saved
 158     // whatever value it held
 159     __ mov(rthread, r12);
 160     // And method
 161     __ mov(rmethod, c_rarg3);
 162 
 163 #ifdef ASSERT
 164     // make sure we have no pending exceptions
 165     {
 166       Label L;
 167       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
 168       __ cmp(rscratch1, (unsigned)NULL_WORD);
 169       __ b(L, Assembler::EQ);
 170       __ stop("StubRoutines::call_stub: entered with pending exception");
 171       __ BIND(L);
 172     }
 173 #endif
 174     __ ldr(rscratch2, Address(rfp, param_sz_arg_off));
 175     // align sp at the time we call java
 176     __ sub(sp, sp, rscratch2, lsl(LogBytesPerWord));
 177     __ align_stack();
 178     __ add(sp, sp, rscratch2, lsl(LogBytesPerWord));
 179 
 180     __ ldr(rscratch1, Address(rfp, params_arg_off));
 181 
 182     BLOCK_COMMENT("pass parameters if any");
 183     Label parameters_done;
 184 
 185     __ reg_printf("call_stub param_off = %p, param_sz = %d\n", rscratch1, rscratch2);
 186     __ cmp(rscratch2, 0);
 187     __ b(parameters_done, Assembler::EQ);
 188 
 189     // r14 makes ok temp as saved
 190     address loop = __ pc();
 191     __ ldr(r14, Address(__ post(rscratch1, wordSize)));
 192     __ subs(rscratch2, rscratch2, 1);
 193 
 194     // TODO remove
 195     __ reg_printf("\tARG SP[%d] : 0x%08x\n", rscratch2, r14);
 196     __ cmp(rscratch2, 0);
 197     // END TODO
 198     __ push(r14);
 199     __ b(loop, Assembler::GT);
 200 
 201     __ BIND(parameters_done);
 202 
 203 #ifdef ASSERT
 204     __ verify_stack_alignment();
 205 #endif
 206 
 207     BLOCK_COMMENT("call Java function");
 208     __ ldr(rscratch1, Address(rfp, entry_point_arg_off));
 209 
 210     __ reg_printf("Calling Java function with rfp = %p, sp = %p\n", rfp, sp);
 211     __ mov(r4, sp);                 // set sender sp
 212     __ bl(rscratch1);
 213     // save current address for use by exception handling code
 214     return_address = __ pc();
 215 
 216     __ reg_printf("Returned to call_stub with rfp = %p, sp = %p\n", rfp, sp);
 217 
 218 
 219     // At this point rfp should be restored to the value it was set to before
 220     // use it to set the top of stack.
 221     __ sub(sp, rfp, rfp_tos_offset_bytes);
 222 
 223 #ifdef ASSERT
 224     // verify that threads correspond
 225     __ ldr(r12, Address(rfp, thread_off));
 226     //rfp points to register stored in highest memory location - first on
 227     // stack, that's the saved lr, r12 is just below that
 228     // stored in r12 at this point
 229     {
 230       Label L, S;
 231       __ cmp(rthread, r12);
 232       __ b(S, Assembler::NE);
 233       __ get_thread(r12);
 234       __ cmp(rthread, r12);
 235       __ b(L, Assembler::EQ);
 236       __ BIND(S);
 237       __ stop("StubRoutines::call_stub: threads must correspond");
 238       __ BIND(L);
 239     }
 240 #endif
 241 
 242     if(MacroAssembler::enable_debugging_static){
 243       // FIXME Remove this hacky debugging code
 244       Label L;
 245       __ ldr(rscratch2, Address(rthread, Thread::pending_exception_offset()));
 246       __ cbnz(rscratch2, L);
 247       // If we're returning via an exception then we shouldn't report exit,
 248       // the exception handler will have already reported the exit and reporting
 249       // via our progress through the call stub will result in an extra method
 250       // being reported as exited.
 251       __ print_method_exit();
 252       __ bind(L);
 253     }
 254 
 255     // NOTE Horrible tricks here
 256     // We need to preserve current r0 and r1 values as they contain the return value.
 257     // First we discard r0 saved to stack, no longer needed.
 258     // We have saved result and type as c_rarg1 and c_rarg2, so now we alter
 259     // the regset to load as follows:
 260     // c_rarg2 = result
 261     // c_rarg3 = result_type
 262 
 263     assert((c_save_regset & 0xf) == 0b0111, "change me");
 264     __ add(sp, sp, wordSize);
 265     const int altered_saved_regset = (~0xf & c_save_regset) | 0xc;
 266     __ ldmia(sp, altered_saved_regset);
 267 
 268     // store result depending on type (everything that is not
 269     // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
 270     // n.b. this assumes Java returns an integral result in r0
 271     // and a floating result in j_farg0
 272 
 273     Label is_object, is_long, is_float, is_double, exit;
 274     __ cmp(c_rarg3, T_OBJECT);
 275     __ b(is_object, Assembler::EQ);
 276     __ cmp(c_rarg3, T_LONG);
 277     __ b(is_long, Assembler::EQ);
 278     if(hasFPU()) {
 279         // soft FP fall through T_INT case
 280         __ cmp(c_rarg3, T_FLOAT);
 281         __ b(is_float, Assembler::EQ);
 282     }
 283     __ cmp(c_rarg3, T_DOUBLE);
 284     if(hasFPU()) {
 285         __ b(is_double, Assembler::EQ);
 286     } else {
 287         __ b(is_long, Assembler::EQ);
 288     }
 289 
 290     // handle T_INT case
 291     __ str(r0, Address(c_rarg2));
 292 
 293     __ BIND(exit);
 294     __ leave(); //Restore rfp, sp, lr
 295     __ reg_printf("leaving call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr);
 296     // Pop arguments from stack.
 297     //__ add(sp, sp, 4 * wordSize);
 298 
 299     __ b(lr);
 300 
 301     // handle return types different from T_INT
 302     __ BIND(is_object);
 303     __ mov(r1, 0);
 304 
 305     __ BIND(is_long);
 306     __ strd(r0, r1, Address(c_rarg2, 0));
 307     __ b(exit, Assembler::AL);
 308 
 309     if(hasFPU()) {
 310         __ BIND(is_float);
 311         __ vstr_f32(f0, Address(c_rarg2, 0));
 312         __ b(exit, Assembler::AL);
 313 
 314         __ BIND(is_double);
 315         __ vstr_f64(d0, Address(c_rarg2, 0));
 316         __ b(exit, Assembler::AL);
 317     }
 318     return start;
 319   }
 320 
 321   // Return point for a Java call if there's an exception thrown in
 322   // Java code.  The exception is caught and transformed into a
 323   // pending exception stored in JavaThread that can be tested from
 324   // within the VM.
 325   //
 326   // Note: Usually the parameters are removed by the callee. In case
 327   // of an exception crossing an activation frame boundary, that is
 328   // not the case if the callee is compiled code => need to setup the
 329   // rsp.
 330   //
 331   // r0: exception oop
 332 
 333   // NOTE: this is used as a target from the signal handler so it
 334   // needs an x86 prolog which returns into the current simulator
 335   // executing the generated catch_exception code. so the prolog
 336   // needs to install rax in a sim register and adjust the sim's
 337   // restart pc to enter the generated code at the start position
 338   // then return from native to simulated execution.
 339 
 340   address generate_catch_exception() {
 341     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 342     address start = __ pc();
 343 
 344     // same as in generate_call_stub():
 345     const Address thread(rfp, thread_off);
 346 
 347 #ifdef ASSERT
 348     // verify that threads correspond
 349     {
 350       Label L, S;
 351       __ ldr(rscratch1, thread);
 352       __ cmp(rthread, rscratch1);
 353       __ b(S, Assembler::NE);
 354       __ get_thread(rscratch1);
 355       __ cmp(rthread, rscratch1);
 356       __ b(L, Assembler::EQ);
 357       __ bind(S);
 358       __ stop("StubRoutines::catch_exception: threads must correspond");
 359       __ bind(L);
 360     }
 361 #endif
 362 
 363     // set pending exception
 364     __ verify_oop(r0);
 365 
 366     __ str(r0, Address(rthread, Thread::pending_exception_offset()));
 367     __ mov(rscratch1, (address)__FILE__);
 368     __ str(rscratch1, Address(rthread, Thread::exception_file_offset()));
 369     __ mov(rscratch1, (int)__LINE__);
 370     __ str(rscratch1, Address(rthread, Thread::exception_line_offset()));
 371 
 372     // complete return to VM
 373     assert(StubRoutines::_call_stub_return_address != NULL,
 374            "_call_stub_return_address must have been generated before");
 375     __ b(StubRoutines::_call_stub_return_address);
 376 
 377     return start;
 378   }
 379 
 380   // Continuation point for runtime calls returning with a pending
 381   // exception.  The pending exception check happened in the runtime
 382   // or native call stub.  The pending exception in Thread is
 383   // converted into a Java-level exception.
 384   //
 385   // Contract with Java-level exception handlers:
 386   // r0: exception
 387   // r3: throwing pc
 388   //
 389   // NOTE: At entry of this stub, exception-pc must be in LR !!
 390 
 391   // NOTE: this is always used as a jump target within generated code
 392   // so it just needs to be generated code wiht no x86 prolog
 393 
 394   address generate_forward_exception() {
 395     //FIXME NOTE ON ALTERATION TO ARM32 IT WAS ASSUMED THAT rmethod
 396     // won't be used anymore and set on entry to the handler - is this true?
 397 
 398     Register spare = rmethod;
 399 
 400     StubCodeMark mark(this, "StubRoutines", "forward exception");
 401     address start = __ pc();
 402 
 403     // Upon entry, LR points to the return address returning into
 404     // Java (interpreted or compiled) code; i.e., the return address
 405     // becomes the throwing pc.
 406     //
 407     // Arguments pushed before the runtime call are still on the stack
 408     // but the exception handler will reset the stack pointer ->
 409     // ignore them.  A potential result in registers can be ignored as
 410     // well.
 411 
 412 #ifdef ASSERT
 413     // make sure this code is only executed if there is a pending exception
 414     {
 415       Label L;
 416       __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
 417       __ cbnz(rscratch1, L);
 418       __ stop("StubRoutines::forward exception: no pending exception (1)");
 419       __ bind(L);
 420     }
 421 #endif
 422 
 423     // compute exception handler into r2
 424 
 425     // call the VM to find the handler address associated with the
 426     // caller address. pass thread in r0 and caller pc (ret address)
 427     // in r1. n.b. the caller pc is in lr, unlike x86 where it is on
 428     // the stack.
 429     __ mov(c_rarg1, lr);
 430     // lr will be trashed by the VM call so we move it to R2
 431     // (callee-saved) because we also need to pass it to the handler
 432     // returned by this call.
 433     __ mov(spare, lr); //note rscratch1 is a callee saved register
 434     BLOCK_COMMENT("call exception_handler_for_return_address");
 435     __ call_VM_leaf(CAST_FROM_FN_PTR(address,
 436                          SharedRuntime::exception_handler_for_return_address),
 437                     rthread, c_rarg1);
 438     // we should not really care that lr is no longer the callee
 439     // address. we saved the value the handler needs in spare so we can
 440     // just copy it to r3. however, the C2 handler will push its own
 441     // frame and then calls into the VM and the VM code asserts that
 442     // the PC for the frame above the handler belongs to a compiled
 443     // Java method. So, we restore lr here to satisfy that assert.
 444     __ mov(lr, spare);
 445     // setup r0 & r3 & clear pending exception
 446     __ mov(r3, spare);
 447     __ mov(spare, r0);
 448     __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
 449     __ mov(rscratch1, 0);
 450     __ str(rscratch1, Address(rthread, Thread::pending_exception_offset()));
 451 
 452 #ifdef ASSERT
 453     // make sure exception is set
 454     {
 455       Label L;
 456       __ cbnz(r0, L);
 457       __ stop("StubRoutines::forward exception: no pending exception (2)");
 458       __ bind(L);
 459     }
 460 #endif
 461     // continue at exception handler
 462     // r0: exception
 463     // r3: throwing pc
 464     // spare: exception handler
 465 
 466     __ verify_oop(r0);
 467     __ b(spare);
 468 
 469     return start;
 470   }
 471 
 472   // Non-destructive plausibility checks for oops
 473   //
 474   // Arguments:
 475   //    r0: oop to verify
 476   //    rscratch1: error message
 477   //
 478   // Stack after saving c_rarg3:
 479   //    [tos + 0]: saved c_rarg3
 480   //    [tos + 1]: saved c_rarg2
 481   //    [tos + 2]: saved lr
 482   //    [tos + 3]: saved rscratch2
 483   //    [tos + 4]: saved r1
 484   //    [tos + 5]: saved r0
 485   //    [tos + 6]: saved rscratch1
 486   address generate_verify_oop() {
 487     StubCodeMark mark(this, "StubRoutines", "verify_oop");
 488     address start = __ pc();
 489 
 490     Label exit, error;
 491 
 492     // save c_rarg2 and c_rarg3
 493     __ stmdb(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 494 
 495     __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
 496     __ ldr(c_rarg3, Address(c_rarg2));
 497     __ add(c_rarg3, c_rarg3, 1);
 498     __ str(c_rarg3, Address(c_rarg2));
 499 
 500     // object is in r0
 501     // make sure object is 'reasonable'
 502     __ cbz(r0, exit); // if obj is NULL it is OK
 503 
 504     // Check if the oop is in the right area of memory
 505     __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask());
 506     __ andr(c_rarg2, r0, c_rarg3);
 507     __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 508 
 509     // Compare c_rarg2 and c_rarg3.  We don't use a compare
 510     // instruction here because the flags register is live.
 511     __ eor(c_rarg2, c_rarg2, c_rarg3);
 512     __ cbnz(c_rarg2, error);
 513 
 514     // make sure klass is 'reasonable', which is not zero.
 515     __ load_klass(r0, r0);  // get klass
 516     __ cbz(r0, error);      // if klass is NULL it is broken
 517 
 518     // return if everything seems ok
 519     __ bind(exit);
 520 
 521     __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 522     __ b(lr);
 523 
 524     // handle errors
 525     __ bind(error);
 526     __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 527 
 528     __ pusha();
 529     // Save old sp
 530     __ add(c_rarg2, sp, 14 * wordSize);
 531     __ str(c_rarg2, Address( __ pre(sp, -wordSize)));
 532     __ mov(c_rarg0, rscratch1);      // pass address of error message
 533     __ mov(c_rarg1, lr);             // pass return address
 534     __ mov(c_rarg2, sp);             // pass address of regs on stack
 535 #ifndef PRODUCT
 536     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 537 #endif
 538     BLOCK_COMMENT("call MacroAssembler::debug");
 539     __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug32));
 540     __ bl(rscratch1);
 541     __ hlt(0);
 542 
 543     return start;
 544   }
 545 
 546   // NOTE : very strange, I changed this but I don't know why the Address:(signed extend word) was here
 547   //void array_overlap_test(Label& L_no_overlap, Address sf) { __ b(L_no_overlap); }
 548   void array_overlap_test(Label& L_no_overlap) { __ b(L_no_overlap); }
 549   //no test being performed ?
 550 
 551   // Generate code for an array write pre barrier
 552   //
 553   //     addr    -  starting address
 554   //     count   -  element count
 555   //     tmp     - scratch register
 556   //
 557   //     Destroy no registers!
 558   //
 559   void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
 560     BarrierSet* bs = Universe::heap()->barrier_set();
 561     switch (bs->kind()) {
 562     case BarrierSet::G1SATBCTLogging:
 563       // With G1, don't generate the call if we statically know that the target in uninitialized
 564       if (!dest_uninitialized) {
 565         __ push(RegSet::range(r0, r12), sp);         // integer registers except lr & sp
 566         if (count == c_rarg0) {
 567           if (addr == c_rarg1) {
 568             // exactly backwards!!
 569             __ strd(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize));
 570             __ ldrd(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize));
 571           } else {
 572             __ mov(c_rarg1, count);
 573             __ mov(c_rarg0, addr);
 574           }
 575         } else {
 576           __ mov(c_rarg0, addr);
 577           __ mov(c_rarg1, count);
 578         }
 579         __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
 580         __ pop(RegSet::range(r0, r12), sp);         // integer registers except lr & sp        }
 581         break;
 582       case BarrierSet::CardTableModRef:
 583       case BarrierSet::CardTableExtension:
 584       case BarrierSet::ModRef:
 585         break;
 586       default:
 587         ShouldNotReachHere();
 588 
 589       }
 590     }
 591   }
 592 
 593   //
 594   // Generate code for an array write post barrier
 595   //
 596   //  Input:
 597   //     start    - register containing starting address of destination array
 598   //     end      - register containing ending address of destination array
 599   //     scratch  - scratch register
 600   //
 601   //  The input registers are overwritten.
 602   //  The ending address is inclusive.
 603   void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
 604     assert_different_registers(start, end, scratch);
 605     BarrierSet* bs = Universe::heap()->barrier_set();
 606     switch (bs->kind()) {
 607       case BarrierSet::G1SATBCTLogging:
 608 
 609         {
 610           __ push(RegSet::range(r0, r12), sp);         // integer registers except lr & sp
 611           // must compute element count unless barrier set interface is changed (other platforms supply count)
 612           assert_different_registers(start, end, scratch);
 613           __ lea(scratch, Address(end, BytesPerHeapOop));
 614           __ sub(scratch, scratch, start);               // subtract start to get #bytes
 615           __ lsr(scratch, scratch, LogBytesPerHeapOop);  // convert to element count
 616           __ mov(c_rarg0, start);
 617           __ mov(c_rarg1, scratch);
 618           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
 619           __ pop(RegSet::range(r0, r12), sp);         // integer registers except lr & sp        }
 620         }
 621         break;
 622       case BarrierSet::CardTableModRef:
 623       case BarrierSet::CardTableExtension:
 624         {
 625           CardTableModRefBS* ct = (CardTableModRefBS*)bs;
 626           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 627 
 628           Label L_loop;
 629 
 630            __ lsr(start, start, CardTableModRefBS::card_shift);
 631            __ lsr(end, end, CardTableModRefBS::card_shift);
 632            __ sub(end, end, start); // number of bytes to copy
 633 
 634           const Register count = end; // 'end' register contains bytes count now
 635           __ mov(scratch, (address)ct->byte_map_base);
 636           __ add(start, start, scratch);
 637           __ BIND(L_loop);
 638           __ mov(scratch, 0);
 639           __ strb(scratch, Address(start, count));
 640           __ subs(count, count, 1);
 641           __ b(L_loop, Assembler::HS);
 642         }
 643         break;
 644       default:
 645         ShouldNotReachHere();
 646 
 647     }
 648   }
 649 
 650   //
 651   // Small copy: less than 4 bytes.
 652   //
 653   // NB: Ignores all of the bits of count which represent more than 3
 654   // bytes, so a caller doesn't have to mask them.
 655 
 656   void copy_memory_small(Register s, Register d, Register count, Register tmp, bool is_aligned, int step) {
 657     const int granularity = uabs(step);
 658     const bool gen_always = !is_aligned || (-4 < step && step < 0);
 659     Label halfword, done;
 660 
 661     if ((granularity <= 1) || gen_always) {
 662       __ tst(count, 1);
 663       __ b(halfword, Assembler::EQ);
 664       __ ldrb(tmp, step < 0 ? __ pre(s, -1) : __ post(s, 1));
 665       __ strb(tmp, step < 0 ? __ pre(d, -1) : __ post(d, 1));
 666     }
 667 
 668     if ((granularity <= 2) || gen_always) {
 669       __ bind(halfword);
 670       __ tst(count, 2);
 671       __ b(done, Assembler::EQ);
 672       __ ldrh(tmp, step < 0 ? __ pre(s, -2) : __ post(s, 2));
 673       __ strh(tmp, step < 0 ? __ pre(d, -2) : __ post(d, 2));
 674     }
 675 
 676     __ bind(done);
 677   }
 678 
 679   void copy_memory_simd(Register s, Register d,
 680                    Register count, Register tmp, int step,
 681                    DoubleFloatRegSet tmp_set, size_t tmp_set_size ) {
 682     assert(UseSIMDForMemoryOps, "should be available");
 683     Label simd_loop, simd_small;
 684 
 685     __ cmp(count, tmp_set_size);
 686     __ b(simd_small, Assembler::LT);
 687 
 688     __ mov(tmp, count, __ lsr(exact_log2(tmp_set_size)));
 689     __ sub(count, count, tmp, __ lsl(exact_log2(tmp_set_size)));
 690 
 691     __ bind(simd_loop);
 692 
 693     __ pld(s, step < 0 ? -2 * tmp_set_size : tmp_set_size);
 694 
 695     if (step < 0) {
 696       __ vldmdb_f64(s, tmp_set.bits());
 697       __ vstmdb_f64(d, tmp_set.bits());
 698     } else {
 699       __ vldmia_f64(s, tmp_set.bits());
 700       __ vstmia_f64(d, tmp_set.bits());
 701     }
 702 
 703     __ subs(tmp, tmp, 1);
 704     __ b(simd_loop, Assembler::NE);
 705 
 706     __ bind(simd_small);
 707   }
 708 
 709   // All-singing all-dancing memory copy.
 710   //
 711   // Copy count units of memory from s to d.  The size of a unit is
 712   // step, which can be positive or negative depending on the direction
 713   // of copy.  If is_aligned is false, we align the source address.
 714   //
 715 
 716   void copy_memory(bool is_aligned, Register s, Register d,
 717                    Register count, Register tmp, int step) {
 718     const int small_copy_size = 32; // 1 copy by ldm pays off alignment efforts and push/pop of temp set
 719     const int granularity = uabs(step);
 720     const Register tmp2 = rscratch2;
 721     const Register t0 = r3;
 722     Label small;
 723 
 724     assert_different_registers(s, d, count, tmp, tmp2, t0);
 725 
 726     __ mov(count, count, __ lsl(exact_log2(granularity)));
 727 
 728     if (step < 0) {
 729       __ add(s, s, count);
 730       __ add(d, d, count);
 731     }
 732 
 733     __ cmp(count, small_copy_size);
 734     __ b(small, Assembler::LT);
 735 
 736     // aligning
 737     if (!is_aligned || (-4 < step && step < 0)) {
 738       assert(3 <= small_copy_size, "may copy number of bytes required for alignment");
 739       if (step < 0) {
 740         __ andr(tmp2, s, 3);
 741       } else {
 742         __ rsb(tmp2, s, 0);
 743         __ andr(tmp2, tmp2, 3);
 744       }
 745       __ sub(count, count, tmp2);
 746       copy_memory_small(s, d, tmp2, tmp, is_aligned, step);
 747     }
 748 
 749 #ifdef ASSERT
 750     Label src_aligned;
 751     __ tst(s, 3);
 752     __ b(src_aligned, Assembler::EQ);
 753     __ stop("src is not aligned");
 754     __ bind(src_aligned);
 755 #endif
 756 
 757     // if destination is unaliged, copying by words is the only option
 758     __ tst(d, 3);
 759     __ b(small, Assembler::NE);
 760     if (UseSIMDForMemoryOps && (VM_Version::features() & FT_AdvSIMD)) {
 761       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d7), 64);
 762       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d1), 16);
 763     } else {
 764       const RegSet tmp_set = RegSet::range(r4, r7);
 765       const int tmp_set_size = 16;
 766       Label ldm_loop;
 767 
 768       assert_different_registers(s, d, count, tmp2, r4, r5, r6, r7);
 769 
 770       __ cmp(count, tmp_set_size);
 771       __ b(small, Assembler::LT);
 772 
 773       __ push(tmp_set, sp);
 774 
 775       __ mov(tmp2, count, __ lsr(exact_log2(tmp_set_size)));
 776       __ sub(count, count, tmp2, __ lsl(exact_log2(tmp_set_size)));
 777 
 778       __ bind(ldm_loop);
 779 
 780       __ pld(s, step < 0 ? -2 * tmp_set_size : tmp_set_size);
 781 
 782       if (step < 0) {
 783         __ ldmdb(s, tmp_set.bits());
 784         __ stmdb(d, tmp_set.bits());
 785       } else {
 786         __ ldmia(s, tmp_set.bits());
 787         __ stmia(d, tmp_set.bits());
 788       }
 789 
 790       __ subs(tmp2, tmp2, 1);
 791       __ b(ldm_loop, Assembler::NE);
 792 
 793       __ pop(tmp_set, sp);
 794     }
 795 
 796     __ bind(small);
 797 
 798     Label words_loop, words_done;
 799     __ cmp(count, BytesPerWord);
 800     __ b(words_done, Assembler::LT);
 801 
 802     __ mov(tmp2, count, __ lsr(exact_log2(BytesPerWord)));
 803     __ sub(count, count, tmp2, __ lsl(exact_log2(BytesPerWord)));
 804 
 805     __ bind(words_loop);
 806 
 807     Address src = step < 0 ? __ pre(s, -BytesPerWord) : __ post(s, BytesPerWord);
 808     Address dst = step < 0 ? __ pre(d, -BytesPerWord) : __ post(d, BytesPerWord);
 809 
 810     __ pld(s, step < 0 ? -2 * BytesPerWord : BytesPerWord);
 811     __ ldr(t0, src);
 812     __ str(t0, dst);
 813     __ subs(tmp2, tmp2, 1);
 814 
 815     __ b(words_loop, Assembler::NE);
 816 
 817     __ bind(words_done);
 818     copy_memory_small(s, d, count, tmp, is_aligned, step);
 819   }
 820 
 821   // Arguments:
 822   //   aligned - true => Input and output aligned on a HeapWord == 4-byte boundary
 823   //             ignored
 824   //   is_oop  - true => oop array, so generate store check code
 825   //   name    - stub name string
 826   //
 827   // Inputs:
 828   //   c_rarg0   - source array address
 829   //   c_rarg1   - destination array address
 830   //   c_rarg2   - element count, treated as ssize_t, can be zero
 831   //
 832   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 833   // the hardware handle it.  The two dwords within qwords that span
 834   // cache line boundaries will still be loaded and stored atomicly.
 835   //
 836   // Side Effects:
 837   //   disjoint_int_copy_entry is set to the no-overlap entry point
 838   //   used by generate_conjoint_int_oop_copy().
 839   //
 840   address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
 841                                   const char *name, bool dest_uninitialized = false) {
 842     Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 843     __ align(CodeEntryAlignment);
 844     StubCodeMark mark(this, "StubRoutines", name);
 845     address start = __ pc();
 846     if (entry != NULL) {
 847       *entry = __ pc();
 848       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
 849       BLOCK_COMMENT("Entry:");
 850     }
 851     __ enter();
 852     if (is_oop) {
 853       __ push(RegSet::of(d, count), sp);
 854       // no registers are destroyed by this call
 855       gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
 856     }
 857     copy_memory(aligned, s, d, count, rscratch1, size);
 858     if (is_oop) {
 859       __ pop(RegSet::of(d, count), sp);
 860       __ sub(count, count, 1); // make an inclusive end pointer
 861       __ lea(count, Address(d, count, lsl(exact_log2(size))));
 862       gen_write_ref_array_post_barrier(d, count, rscratch1);
 863     }
 864     __ leave();
 865     __ b(lr);
 866     return start;
 867   }
 868 
 869   // Arguments:
 870   //   aligned - true => Input and output aligned on a HeapWord == 4-byte boundary
 871   //             ignored
 872   //   is_oop  - true => oop array, so generate store check code
 873   //   name    - stub name string
 874   //
 875   // Inputs:
 876   //   c_rarg0   - source array address
 877   //   c_rarg1   - destination array address
 878   //   c_rarg2   - element count, treated as ssize_t, can be zero
 879   //
 880   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 881   // the hardware handle it.  The two dwords within qwords that span
 882   // cache line boundaries will still be loaded and stored atomicly.
 883   //
 884   address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
 885                                  address *entry, const char *name,
 886                                  bool dest_uninitialized = false) {
 887     Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 888     __ align(CodeEntryAlignment);
 889     StubCodeMark mark(this, "StubRoutines", name);
 890     address start = __ pc();
 891 
 892     __ cmp(d, s);
 893     __ b(nooverlap_target, Assembler::LS);
 894 
 895     __ enter();
 896     if (is_oop) {
 897       __ push(RegSet::of(d, count), sp);
 898       // no registers are destroyed by this call
 899       gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
 900     }
 901     copy_memory(aligned, s, d, count, rscratch1, -size);
 902     if (is_oop) {
 903       __ pop(RegSet::of(d, count), sp);
 904       __ sub(count, count, 1); // make an inclusive end pointer
 905       __ lea(count, Address(d, count, lsl(exact_log2(size))));
 906       gen_write_ref_array_post_barrier(d, count, rscratch1);
 907     }
 908     __ leave();
 909     __ b(lr);
 910     return start;
 911   }
 912 
 913   // Helper for generating a dynamic type check.
 914   // Smashes rscratch1.
 915   void generate_type_check(Register sub_klass,
 916                            Register super_check_offset,
 917                            Register super_klass,
 918                            Label& L_success) {
 919     assert_different_registers(sub_klass, super_check_offset, super_klass);
 920 
 921     BLOCK_COMMENT("type_check:");
 922 
 923     Label L_miss;
 924 
 925     __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
 926                                      super_check_offset);
 927     __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 928 
 929     // Fall through on failure!
 930     __ BIND(L_miss);
 931   }
 932 
 933   //
 934   //  Generate checkcasting array copy stub
 935   //
 936   //  Input:
 937   //    c_rarg0   - source array address
 938   //    c_rarg1   - destination array address
 939   //    c_rarg2   - oop ckval (super_klass)
 940   //    c_rarg3   - size_t ckoff (super_check_offset)
 941   //    r4        - element count, treated as ssize_t, can be zero
 942   //
 943   //  Output:
 944   //    r0 ==  0  -  success
 945   //    r0 == -1^K - failure, where K is partial transfer count
 946   //
 947   address generate_checkcast_copy(const char *name, address *entry,
 948                                   bool dest_uninitialized = false) {
 949     Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
 950 
 951     // Input registers (after setup_arg_regs)
 952     const Register from        = c_rarg0;   // source array address
 953     const Register to          = c_rarg1;   // destination array address
 954     const Register count       = r4;        // elementscount
 955     const Register ckoff       = c_rarg3;   // super_check_offset
 956     const Register ckval       = c_rarg2;   // super_klass
 957 
 958     // Registers used as temps
 959     const Register count_save  = r5;       // orig elementscount
 960     const Register copied_oop  = r6;       // actual oop copied
 961     const Register oop_klass   = r7;       // oop._klass
 962 
 963     //---------------------------------------------------------------
 964     // Assembler stub will be used for this call to arraycopy
 965     // if the two arrays are subtypes of Object[] but the
 966     // destination array type is not equal to or a supertype
 967     // of the source type.  Each element must be separately
 968     // checked.
 969 
 970     assert_different_registers(from, to, count, ckoff, ckval,
 971                                copied_oop, oop_klass, count_save);
 972 
 973     __ align(CodeEntryAlignment);
 974     StubCodeMark mark(this, "StubRoutines", name);
 975     address start = __ pc();
 976 
 977     __ enter(); // required for proper stackwalking of RuntimeStub frame
 978 
 979 #ifdef ASSERT
 980     // caller guarantees that the arrays really are different
 981     // otherwise, we would have to make conjoint checks
 982     { Label L;
 983       array_overlap_test(L);//, TIMES_OOP);
 984       __ stop("checkcast_copy within a single array");
 985       __ bind(L);
 986     }
 987 #endif //ASSERT
 988 
 989     // Caller of this entry point must set up the argument registers.
 990     if (entry != NULL) {
 991       *entry = __ pc();
 992       BLOCK_COMMENT("Entry:");
 993     }
 994 
 995      // Empty array:  Nothing to do.
 996     __ cbz(count, L_done);
 997 
 998     __ push(RegSet::of(count_save, copied_oop, oop_klass), sp);
 999 
1000 #ifdef ASSERT
1001     BLOCK_COMMENT("assert consistent ckoff/ckval");
1002     // The ckoff and ckval must be mutually consistent,
1003     // even though caller generates both.
1004     { Label L;
1005       int sco_offset = in_bytes(Klass::super_check_offset_offset());
1006       __ ldr(rscratch1, Address(ckval, sco_offset));
1007       __ cmp(ckoff, rscratch1);
1008       __ b(L, Assembler::EQ);
1009       __ stop("super_check_offset inconsistent");
1010       __ bind(L);
1011     }
1012 #endif //ASSERT
1013 
1014     // save the original count
1015     __ mov(count_save, count);
1016 
1017     // save destination array start address
1018     __ push(to);
1019 
1020     // Copy from low to high addresses
1021     __ b(L_load_element);
1022 
1023     // ======== begin loop ========
1024     // (Loop is rotated; its entry is L_load_element.)
1025     // Loop control:
1026     //   for (; count != 0; count--) {
1027     //     copied_oop = load_heap_oop(from++);
1028     //     ... generate_type_check ...;
1029     //     store_heap_oop(to++, copied_oop);
1030     //   }
1031     __ align(OptoLoopAlignment);
1032 
1033     __ BIND(L_store_element);
1034     __ store_heap_oop(__ post(to, 4), copied_oop);  // store the oop
1035     __ sub(count, count, 1);
1036     __ cbz(count, L_do_card_marks);
1037 
1038     // ======== loop entry is here ========
1039     __ BIND(L_load_element);
1040     __ load_heap_oop(copied_oop, __ post(from, 4)); // load the oop
1041     __ cbz(copied_oop, L_store_element);
1042 
1043     __ load_klass(oop_klass, copied_oop);// query the object klass
1044     generate_type_check(oop_klass, ckoff, ckval, L_store_element);
1045     // ======== end loop ========
1046 
1047     // It was a real error; we must depend on the caller to finish the job.
1048     // Register count = remaining oops, count_orig = total oops.
1049     // Emit GC store barriers for the oops we have copied and report
1050     // their number to the caller.
1051 
1052     __ subs(count, count_save, count);     // K = partially copied oop count
1053     __ inv(count, count);                   // report (-1^K) to caller
1054     __ b(L_done_pop, Assembler::EQ);
1055 
1056     __ BIND(L_do_card_marks);
1057     __ add(to, to, -heapOopSize);         // make an inclusive end pointer
1058     __ pop(rscratch2);                    // restore original to address
1059     gen_write_ref_array_post_barrier(rscratch2, to, rscratch1);
1060 
1061     __ bind(L_done_pop);
1062     __ pop(RegSet::of(count_save, copied_oop, oop_klass), sp);
1063     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1064 
1065     __ bind(L_done);
1066     __ mov(r0, count);
1067     __ leave();
1068     __ b(lr);
1069     return start;
1070   }
1071 
1072   void generate_arraycopy_stubs() {
1073     address entry;
1074 
1075     // jbyte
1076     StubRoutines::_arrayof_jbyte_disjoint_arraycopy =      generate_disjoint_copy(sizeof(jbyte),  true,  false,        &entry, "arrayof_jbyte_disjoint_arraycopy");
1077     StubRoutines::_arrayof_jbyte_arraycopy =               generate_conjoint_copy(sizeof(jbyte),  true,  false, entry, NULL,   "arrayof_jbyte_arraycopy");
1078     StubRoutines::_jbyte_disjoint_arraycopy =              generate_disjoint_copy(sizeof(jbyte),  false, false,        &entry, "jbyte_disjoint_arraycopy");
1079     StubRoutines::_jbyte_arraycopy =                       generate_conjoint_copy(sizeof(jbyte),  false, false, entry, NULL,   "jbyte_arraycopy");
1080     // jshort
1081     StubRoutines::_arrayof_jshort_disjoint_arraycopy =     generate_disjoint_copy(sizeof(jshort), true,  false,        &entry, "arrayof_jshort_disjoint_arraycopy");
1082     StubRoutines::_arrayof_jshort_arraycopy =              generate_conjoint_copy(sizeof(jshort), true,  false, entry, NULL,   "arrayof_jshort_arraycopy");
1083     StubRoutines::_jshort_disjoint_arraycopy =             generate_disjoint_copy(sizeof(jshort), false, false,        &entry, "jshort_disjoint_arraycopy");
1084     StubRoutines::_jshort_arraycopy =                      generate_conjoint_copy(sizeof(jshort), false, false, entry, NULL,   "jshort_arraycopy");
1085     // jint (always aligned)
1086     StubRoutines::_arrayof_jint_disjoint_arraycopy =       generate_disjoint_copy(sizeof(jint),   true,  false,        &entry, "arrayof_jint_disjoint_arraycopy");
1087     StubRoutines::_arrayof_jint_arraycopy =                generate_conjoint_copy(sizeof(jint),   true,  false, entry, NULL,   "arrayof_jint_arraycopy");
1088     StubRoutines::_jint_disjoint_arraycopy =               StubRoutines::_arrayof_jint_disjoint_arraycopy;
1089     StubRoutines::_jint_arraycopy =                        StubRoutines::_arrayof_jint_arraycopy;
1090     // jlong (always aligned)
1091     StubRoutines::_arrayof_jlong_disjoint_arraycopy =      generate_disjoint_copy(sizeof(jlong),  true,  false,        &entry, "arrayof_jlong_disjoint_arraycopy");
1092     StubRoutines::_arrayof_jlong_arraycopy =               generate_conjoint_copy(sizeof(jlong),  true,  false, entry, NULL,   "arrayof_jlong_arraycopy");
1093     StubRoutines::_jlong_disjoint_arraycopy =              StubRoutines::_arrayof_jlong_disjoint_arraycopy;
1094     StubRoutines::_jlong_arraycopy =                       StubRoutines::_arrayof_jlong_arraycopy;
1095     // OOP (always aligned)
1096     StubRoutines::_arrayof_oop_disjoint_arraycopy =        generate_disjoint_copy(sizeof(jint),   true,  true,         &entry, "arrayof_oop_disjoint_arraycopy");
1097     StubRoutines::_arrayof_oop_arraycopy =                 generate_conjoint_copy(sizeof(jint),   true,  true,  entry, NULL,   "arrayof_oop_arraycopy");
1098     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_copy(sizeof(jint),   true,  true,         &entry, "arrayof_oop_disjoint_arraycopy_uninit", true);
1099     StubRoutines::_arrayof_oop_arraycopy_uninit =          generate_conjoint_copy(sizeof(jint),   true,  true,  entry, NULL,   "arrayof_oop_arraycopy_uninit",          true);
1100     StubRoutines::_oop_disjoint_arraycopy =                StubRoutines::_arrayof_oop_disjoint_arraycopy;
1101     StubRoutines::_oop_arraycopy =                         StubRoutines::_arrayof_oop_arraycopy;
1102     StubRoutines::_oop_disjoint_arraycopy_uninit =         StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
1103     StubRoutines::_oop_arraycopy_uninit =                  StubRoutines::_arrayof_oop_arraycopy_uninit;
1104 
1105     StubRoutines::_checkcast_arraycopy =        generate_checkcast_copy("checkcast_arraycopy",        NULL);
1106     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, true);
1107   }
1108 
1109   void generate_math_stubs() { Unimplemented(); }
1110 
1111   // Safefetch stubs.
1112   void generate_safefetch(const char* name, int size, address* entry,
1113                           address* fault_pc, address* continuation_pc) {
1114     // safefetch signatures:
1115     //   int      SafeFetch32(int*      adr, int      errValue);
1116     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1117     //
1118     // arguments:
1119     //   c_rarg0 = adr
1120     //   c_rarg1 = errValue
1121     //
1122     // result:
1123     //   PPC_RET  = *adr or errValue
1124 
1125     StubCodeMark mark(this, "StubRoutines", name);
1126 
1127     // Entry point, pc or function descriptor.
1128     *entry = __ pc();
1129 
1130     // Load *adr into c_rarg1, may fault.
1131     __ mov(c_rarg2, c_rarg0);
1132     *fault_pc = __ pc();
1133     switch (size) {
1134       case 4:
1135         // int32_t
1136         __ ldr(c_rarg0, Address(c_rarg2, 0));
1137         break;
1138       default:
1139         ShouldNotReachHere();
1140     }
1141     __ b(lr);
1142     // return errValue or *adr
1143     *continuation_pc = __ pc();
1144     __ mov(r0, c_rarg1);
1145     __ b(lr);
1146   }
1147 
1148   /**
1149    *  Arguments:
1150    *
1151    * Inputs:
1152    *   c_rarg0   - int crc
1153    *   c_rarg1   - byte* buf
1154    *   c_rarg2   - int length
1155    *
1156    * Output:
1157    *       r0   - int crc result
1158    *
1159    * Preserves:
1160    *       r13
1161    *
1162    */
1163   address generate_updateBytesCRC32() {
1164     assert(UseCRC32Intrinsics, "what are we doing here?");
1165 
1166     __ align(CodeEntryAlignment);
1167     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
1168 
1169     address start = __ pc();
1170 
1171     const Register crc   = c_rarg0;  // crc
1172     const Register buf   = c_rarg1;  // source java byte array address
1173     const Register len   = c_rarg2;  // length
1174     const Register table0 = c_rarg3; // crc_table address
1175     const Register table1 = r4;
1176     const Register table2 = r5;
1177     const Register table3 = lr;
1178 
1179     BLOCK_COMMENT("Entry:");
1180     __ enter(); // required for proper stackwalking of RuntimeStub frame
1181     __ push(RegSet::of(table1, table2, r6, r7), sp);
1182 
1183     __ kernel_crc32(crc, buf, len,
1184               table0, table1, table2, table3, rscratch1, rscratch2, r6);
1185 
1186     __ pop(RegSet::of(table1, table2, r6, r7), sp);
1187     __ leave(); // required for proper stackwalking of RuntimeStub frame
1188     __ ret(lr);
1189 
1190     return start;
1191   }
1192 
1193   // Continuation point for throwing of implicit exceptions that are
1194   // not handled in the current activation. Fabricates an exception
1195   // oop and initiates normal exception dispatching in this
1196   // frame. Since we need to preserve callee-saved values (currently
1197   // only for C2, but done for C1 as well) we need a callee-saved oop
1198   // map and therefore have to make these stubs into RuntimeStubs
1199   // rather than BufferBlobs.  If the compiler needs all registers to
1200   // be preserved between the fault point and the exception handler
1201   // then it must assume responsibility for that in
1202   // AbstractCompiler::continuation_for_implicit_null_exception or
1203   // continuation_for_implicit_division_by_zero_exception. All other
1204   // implicit exceptions (e.g., NullPointerException or
1205   // AbstractMethodError on entry) are either at call sites or
1206   // otherwise assume that stack unwinding will be initiated, so
1207   // caller saved registers were assumed volatile in the compiler.
1208 
1209 #undef __
1210 #define __ masm->
1211 
1212   address generate_throw_exception(const char* name,
1213                                    address runtime_entry,
1214                                    Register arg1 = noreg,
1215                                    Register arg2 = noreg) {
1216     // Information about frame layout at time of blocking runtime call.
1217     // Note that we only have to preserve callee-saved registers since
1218     // the compilers are responsible for supplying a continuation point
1219     // if they expect all registers to be preserved.
1220     // n.b. aarch32 asserts that frame::arg_reg_save_area_bytes == 0
1221     enum layout {
1222       rfp_off = 0,
1223       return_off,
1224       framesize // inclusive of return address
1225     };
1226 
1227     int insts_size = 512;
1228     int locs_size  = 64;
1229 
1230     CodeBuffer code(name, insts_size, locs_size);
1231     OopMapSet* oop_maps  = new OopMapSet();
1232     MacroAssembler* masm = new MacroAssembler(&code);
1233 
1234     address start = __ pc();
1235 
1236     // This is an inlined and slightly modified version of call_VM
1237     // which has the ability to fetch the return PC out of
1238     // thread-local storage and also sets up last_Java_sp slightly
1239     // differently than the real call_VM
1240 
1241     __ enter(); // Save FP and LR before call
1242 
1243     assert(is_even(framesize), "sp not 8-byte aligned");
1244 
1245     int frame_complete = __ pc() - start;
1246 
1247     // Set up last_Java_sp and last_Java_fp
1248     address the_pc = __ pc();
1249     __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1);
1250 
1251     // Call runtime
1252     if (arg1 != noreg) {
1253       assert(arg2 != c_rarg1, "clobbered");
1254       __ mov(c_rarg1, arg1);
1255     }
1256     if (arg2 != noreg) {
1257       __ mov(c_rarg2, arg2);
1258     }
1259     __ mov(c_rarg0, rthread);
1260     BLOCK_COMMENT("call runtime_entry");
1261     __ align_stack();
1262     __ mov(rscratch1, runtime_entry);
1263     __ bl(rscratch1);
1264 
1265     // Generate oop map
1266     OopMap* map = new OopMap(framesize, 0);
1267 
1268     oop_maps->add_gc_map(the_pc - start, map);
1269 
1270     __ reset_last_Java_frame(true);
1271     __ maybe_isb();
1272 
1273     __ leave();
1274 
1275     // check for pending exceptions
1276 #ifdef ASSERT
1277     Label L;
1278     __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
1279     __ cbnz(rscratch1, L);
1280     __ should_not_reach_here();
1281     __ bind(L);
1282 #endif // ASSERT
1283     __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1284 
1285 
1286     // codeBlob framesize is in words (not VMRegImpl::slot_size)
1287     RuntimeStub* stub =
1288       RuntimeStub::new_runtime_stub(name,
1289                                     &code,
1290                                     frame_complete,
1291                                     framesize,
1292                                     oop_maps, false);
1293     return stub->entry_point();
1294   }
1295 
1296   // Initialization
1297   void generate_initial() {
1298     // Generate initial stubs and initializes the entry points
1299 
1300     // entry points that exist in all platforms Note: This is code
1301     // that could be shared among different platforms - however the
1302     // benefit seems to be smaller than the disadvantage of having a
1303     // much more complicated generator structure. See also comment in
1304     // stubRoutines.hpp.
1305 
1306     StubRoutines::_forward_exception_entry = generate_forward_exception();
1307 
1308     StubRoutines::_call_stub_entry =
1309       generate_call_stub(StubRoutines::_call_stub_return_address);
1310 
1311     // is referenced by megamorphic call
1312     StubRoutines::_catch_exception_entry = generate_catch_exception();
1313 
1314     // Build this early so it's available for the interpreter.
1315     StubRoutines::_throw_StackOverflowError_entry =
1316       generate_throw_exception("StackOverflowError throw_exception",
1317                                CAST_FROM_FN_PTR(address,
1318                                                 SharedRuntime::
1319                                                 throw_StackOverflowError));
1320     if (UseCRC32Intrinsics) {
1321       // set table address before stub generation which use it
1322       StubRoutines::_crc_table_adr = (address)StubRoutines::aarch32::_crc_table;
1323       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
1324     }
1325 
1326     NativeCall::init();
1327   }
1328 
1329   void generate_all() {
1330     // support for verify_oop (must happen after universe_init)
1331     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
1332     StubRoutines::_throw_AbstractMethodError_entry =
1333       generate_throw_exception("AbstractMethodError throw_exception",
1334                                CAST_FROM_FN_PTR(address,
1335                                                 SharedRuntime::
1336                                                 throw_AbstractMethodError));
1337 
1338     StubRoutines::_throw_IncompatibleClassChangeError_entry =
1339       generate_throw_exception("IncompatibleClassChangeError throw_exception",
1340                                CAST_FROM_FN_PTR(address,
1341                                                 SharedRuntime::
1342                                                 throw_IncompatibleClassChangeError));
1343 
1344     StubRoutines::_throw_NullPointerException_at_call_entry =
1345       generate_throw_exception("NullPointerException at call throw_exception",
1346                                CAST_FROM_FN_PTR(address,
1347                                                 SharedRuntime::
1348                                                 throw_NullPointerException_at_call));
1349 
1350     // arraycopy stubs used by compilers
1351     generate_arraycopy_stubs();
1352 
1353     // Safefetch stubs.
1354     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
1355                                                        &StubRoutines::_safefetch32_fault_pc,
1356                                                        &StubRoutines::_safefetch32_continuation_pc);
1357     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
1358                                                        &StubRoutines::_safefetchN_fault_pc,
1359                                                        &StubRoutines::_safefetchN_continuation_pc);
1360   }
1361 
1362  public:
1363   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
1364     if (all) {
1365       generate_all();
1366     } else {
1367       generate_initial();
1368     }
1369   }
1370 }; // end class declaration
1371 
1372 void StubGenerator_generate(CodeBuffer* code, bool all) {
1373   StubGenerator g(code, all);
1374 }