< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "classfile/vmIntrinsics.hpp"

  29 #include "compiler/oopMap.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "gc/shared/barrierSetNMethod.hpp"
  33 #include "gc/shared/gc_globals.hpp"
  34 #include "memory/universe.hpp"
  35 #include "prims/jvmtiExport.hpp"
  36 #include "prims/upcallLinker.hpp"
  37 #include "runtime/arguments.hpp"
  38 #include "runtime/continuationEntry.hpp"
  39 #include "runtime/javaThread.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "stubGenerator_x86_64.hpp"
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #include "opto/c2_globals.hpp"
  46 #endif
  47 #if INCLUDE_JVMCI
  48 #include "jvmci/jvmci_globals.hpp"

3036 
3037 
3038 /**
3039  *  Arguments:
3040  *
3041  *  Input:
3042  *    c_rarg0   - x address
3043  *    c_rarg1   - x length
3044  *    c_rarg2   - y address
3045  *    c_rarg3   - y length
3046  * not Win64
3047  *    c_rarg4   - z address
3048  * Win64
3049  *    rsp+40    - z address
3050  */
3051 address StubGenerator::generate_multiplyToLen() {
3052   __ align(CodeEntryAlignment);
3053   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3054   address start = __ pc();
3055 




3056   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3057   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3058   const Register x     = rdi;
3059   const Register xlen  = rax;
3060   const Register y     = rsi;
3061   const Register ylen  = rcx;
3062   const Register z     = r8;
3063 
3064   // Next registers will be saved on stack in multiply_to_len().
3065   const Register tmp0  = r11;
3066   const Register tmp1  = r12;
3067   const Register tmp2  = r13;
3068   const Register tmp3  = r14;
3069   const Register tmp4  = r15;
3070   const Register tmp5  = rbx;
3071 
3072   BLOCK_COMMENT("Entry:");
3073   __ enter(); // required for proper stackwalking of RuntimeStub frame
3074 
3075   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3076                      // ylen => rcx, z => r8
3077                      // r9 and r10 may be used to save non-volatile registers
3078 #ifdef _WIN64
3079   // last argument (#4) is on stack on Win64
3080   __ movptr(z, Address(rsp, 6 * wordSize));
3081 #endif
3082 
3083   __ movptr(xlen, rsi);
3084   __ movptr(y,    rdx);
3085   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3086 
3087   restore_arg_regs();
3088 
3089   __ leave(); // required for proper stackwalking of RuntimeStub frame
3090   __ ret(0);
3091 

3092   return start;
3093 }
3094 
3095 /**
3096 *  Arguments:
3097 *
3098 *  Input:
3099 *    c_rarg0   - obja     address
3100 *    c_rarg1   - objb     address
3101 *    c_rarg3   - length   length
3102 *    c_rarg4   - scale    log2_array_indxscale
3103 *
3104 *  Output:
3105 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3106 */
3107 address StubGenerator::generate_vectorizedMismatch() {
3108   __ align(CodeEntryAlignment);
3109   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3110   address start = __ pc();
3111 

3143 
3144   return start;
3145 }
3146 
3147 /**
3148  *  Arguments:
3149  *
3150 //  Input:
3151 //    c_rarg0   - x address
3152 //    c_rarg1   - x length
3153 //    c_rarg2   - z address
3154 //    c_rarg3   - z length
3155  *
3156  */
3157 address StubGenerator::generate_squareToLen() {
3158 
3159   __ align(CodeEntryAlignment);
3160   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3161   address start = __ pc();
3162 




3163   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3164   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3165   const Register x      = rdi;
3166   const Register len    = rsi;
3167   const Register z      = r8;
3168   const Register zlen   = rcx;
3169 
3170  const Register tmp1      = r12;
3171  const Register tmp2      = r13;
3172  const Register tmp3      = r14;
3173  const Register tmp4      = r15;
3174  const Register tmp5      = rbx;
3175 
3176   BLOCK_COMMENT("Entry:");
3177   __ enter(); // required for proper stackwalking of RuntimeStub frame
3178 
3179   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3180                      // zlen => rcx
3181                      // r9 and r10 may be used to save non-volatile registers
3182   __ movptr(r8, rdx);
3183   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3184 
3185   restore_arg_regs();
3186 
3187   __ leave(); // required for proper stackwalking of RuntimeStub frame
3188   __ ret(0);
3189 

3190   return start;
3191 }
3192 
3193 address StubGenerator::generate_method_entry_barrier() {
3194   __ align(CodeEntryAlignment);
3195   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3196   address start = __ pc();
3197 
3198   Label deoptimize_label;
3199 
3200   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3201 
3202   BLOCK_COMMENT("Entry:");
3203   __ enter(); // save rbp
3204 
3205   // save c_rarg0, because we want to use that value.
3206   // We could do without it but then we depend on the number of slots used by pusha
3207   __ push(c_rarg0);
3208 
3209   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3267 }
3268 
3269  /**
3270  *  Arguments:
3271  *
3272  *  Input:
3273  *    c_rarg0   - out address
3274  *    c_rarg1   - in address
3275  *    c_rarg2   - offset
3276  *    c_rarg3   - len
3277  * not Win64
3278  *    c_rarg4   - k
3279  * Win64
3280  *    rsp+40    - k
3281  */
3282 address StubGenerator::generate_mulAdd() {
3283   __ align(CodeEntryAlignment);
3284   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3285   address start = __ pc();
3286 




3287   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3288   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3289   const Register out     = rdi;
3290   const Register in      = rsi;
3291   const Register offset  = r11;
3292   const Register len     = rcx;
3293   const Register k       = r8;
3294 
3295   // Next registers will be saved on stack in mul_add().
3296   const Register tmp1  = r12;
3297   const Register tmp2  = r13;
3298   const Register tmp3  = r14;
3299   const Register tmp4  = r15;
3300   const Register tmp5  = rbx;
3301 
3302   BLOCK_COMMENT("Entry:");
3303   __ enter(); // required for proper stackwalking of RuntimeStub frame
3304 
3305   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3306                      // len => rcx, k => r8
3307                      // r9 and r10 may be used to save non-volatile registers
3308 #ifdef _WIN64
3309   // last argument is on stack on Win64
3310   __ movl(k, Address(rsp, 6 * wordSize));
3311 #endif
3312   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3313   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3314 
3315   restore_arg_regs();
3316 
3317   __ leave(); // required for proper stackwalking of RuntimeStub frame
3318   __ ret(0);
3319 

3320   return start;
3321 }
3322 
3323 address StubGenerator::generate_bigIntegerRightShift() {
3324   __ align(CodeEntryAlignment);
3325   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3326   address start = __ pc();
3327 
3328   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3329   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3330   const Register newArr = rdi;
3331   const Register oldArr = rsi;
3332   const Register newIdx = rdx;
3333   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3334   const Register totalNumIter = r8;
3335 
3336   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3337   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3338   const Register tmp1 = r11;                    // Caller save.
3339   const Register tmp2 = rax;                    // Caller save.

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/SCCache.hpp"
  30 #include "compiler/oopMap.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/barrierSetNMethod.hpp"
  34 #include "gc/shared/gc_globals.hpp"
  35 #include "memory/universe.hpp"
  36 #include "prims/jvmtiExport.hpp"
  37 #include "prims/upcallLinker.hpp"
  38 #include "runtime/arguments.hpp"
  39 #include "runtime/continuationEntry.hpp"
  40 #include "runtime/javaThread.hpp"
  41 #include "runtime/sharedRuntime.hpp"
  42 #include "runtime/stubRoutines.hpp"
  43 #include "stubGenerator_x86_64.hpp"
  44 #ifdef COMPILER2
  45 #include "opto/runtime.hpp"
  46 #include "opto/c2_globals.hpp"
  47 #endif
  48 #if INCLUDE_JVMCI
  49 #include "jvmci/jvmci_globals.hpp"

3037 
3038 
3039 /**
3040  *  Arguments:
3041  *
3042  *  Input:
3043  *    c_rarg0   - x address
3044  *    c_rarg1   - x length
3045  *    c_rarg2   - y address
3046  *    c_rarg3   - y length
3047  * not Win64
3048  *    c_rarg4   - z address
3049  * Win64
3050  *    rsp+40    - z address
3051  */
3052 address StubGenerator::generate_multiplyToLen() {
3053   __ align(CodeEntryAlignment);
3054   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3055   address start = __ pc();
3056 
3057   if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3058     return start;
3059   }
3060 
3061   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3062   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3063   const Register x     = rdi;
3064   const Register xlen  = rax;
3065   const Register y     = rsi;
3066   const Register ylen  = rcx;
3067   const Register z     = r8;
3068 
3069   // Next registers will be saved on stack in multiply_to_len().
3070   const Register tmp0  = r11;
3071   const Register tmp1  = r12;
3072   const Register tmp2  = r13;
3073   const Register tmp3  = r14;
3074   const Register tmp4  = r15;
3075   const Register tmp5  = rbx;
3076 
3077   BLOCK_COMMENT("Entry:");
3078   __ enter(); // required for proper stackwalking of RuntimeStub frame
3079 
3080   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3081                      // ylen => rcx, z => r8
3082                      // r9 and r10 may be used to save non-volatile registers
3083 #ifdef _WIN64
3084   // last argument (#4) is on stack on Win64
3085   __ movptr(z, Address(rsp, 6 * wordSize));
3086 #endif
3087 
3088   __ movptr(xlen, rsi);
3089   __ movptr(y,    rdx);
3090   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3091 
3092   restore_arg_regs();
3093 
3094   __ leave(); // required for proper stackwalking of RuntimeStub frame
3095   __ ret(0);
3096 
3097   SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3098   return start;
3099 }
3100 
3101 /**
3102 *  Arguments:
3103 *
3104 *  Input:
3105 *    c_rarg0   - obja     address
3106 *    c_rarg1   - objb     address
3107 *    c_rarg3   - length   length
3108 *    c_rarg4   - scale    log2_array_indxscale
3109 *
3110 *  Output:
3111 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3112 */
3113 address StubGenerator::generate_vectorizedMismatch() {
3114   __ align(CodeEntryAlignment);
3115   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3116   address start = __ pc();
3117 

3149 
3150   return start;
3151 }
3152 
3153 /**
3154  *  Arguments:
3155  *
3156 //  Input:
3157 //    c_rarg0   - x address
3158 //    c_rarg1   - x length
3159 //    c_rarg2   - z address
3160 //    c_rarg3   - z length
3161  *
3162  */
3163 address StubGenerator::generate_squareToLen() {
3164 
3165   __ align(CodeEntryAlignment);
3166   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3167   address start = __ pc();
3168 
3169   if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3170     return start;
3171   }
3172 
3173   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3174   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3175   const Register x      = rdi;
3176   const Register len    = rsi;
3177   const Register z      = r8;
3178   const Register zlen   = rcx;
3179 
3180  const Register tmp1      = r12;
3181  const Register tmp2      = r13;
3182  const Register tmp3      = r14;
3183  const Register tmp4      = r15;
3184  const Register tmp5      = rbx;
3185 
3186   BLOCK_COMMENT("Entry:");
3187   __ enter(); // required for proper stackwalking of RuntimeStub frame
3188 
3189   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3190                      // zlen => rcx
3191                      // r9 and r10 may be used to save non-volatile registers
3192   __ movptr(r8, rdx);
3193   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3194 
3195   restore_arg_regs();
3196 
3197   __ leave(); // required for proper stackwalking of RuntimeStub frame
3198   __ ret(0);
3199 
3200   SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3201   return start;
3202 }
3203 
3204 address StubGenerator::generate_method_entry_barrier() {
3205   __ align(CodeEntryAlignment);
3206   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3207   address start = __ pc();
3208 
3209   Label deoptimize_label;
3210 
3211   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3212 
3213   BLOCK_COMMENT("Entry:");
3214   __ enter(); // save rbp
3215 
3216   // save c_rarg0, because we want to use that value.
3217   // We could do without it but then we depend on the number of slots used by pusha
3218   __ push(c_rarg0);
3219 
3220   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3278 }
3279 
3280  /**
3281  *  Arguments:
3282  *
3283  *  Input:
3284  *    c_rarg0   - out address
3285  *    c_rarg1   - in address
3286  *    c_rarg2   - offset
3287  *    c_rarg3   - len
3288  * not Win64
3289  *    c_rarg4   - k
3290  * Win64
3291  *    rsp+40    - k
3292  */
3293 address StubGenerator::generate_mulAdd() {
3294   __ align(CodeEntryAlignment);
3295   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3296   address start = __ pc();
3297 
3298   if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3299     return start;
3300   }
3301 
3302   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3303   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3304   const Register out     = rdi;
3305   const Register in      = rsi;
3306   const Register offset  = r11;
3307   const Register len     = rcx;
3308   const Register k       = r8;
3309 
3310   // Next registers will be saved on stack in mul_add().
3311   const Register tmp1  = r12;
3312   const Register tmp2  = r13;
3313   const Register tmp3  = r14;
3314   const Register tmp4  = r15;
3315   const Register tmp5  = rbx;
3316 
3317   BLOCK_COMMENT("Entry:");
3318   __ enter(); // required for proper stackwalking of RuntimeStub frame
3319 
3320   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3321                      // len => rcx, k => r8
3322                      // r9 and r10 may be used to save non-volatile registers
3323 #ifdef _WIN64
3324   // last argument is on stack on Win64
3325   __ movl(k, Address(rsp, 6 * wordSize));
3326 #endif
3327   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3328   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3329 
3330   restore_arg_regs();
3331 
3332   __ leave(); // required for proper stackwalking of RuntimeStub frame
3333   __ ret(0);
3334 
3335   SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3336   return start;
3337 }
3338 
3339 address StubGenerator::generate_bigIntegerRightShift() {
3340   __ align(CodeEntryAlignment);
3341   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3342   address start = __ pc();
3343 
3344   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3345   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3346   const Register newArr = rdi;
3347   const Register oldArr = rsi;
3348   const Register newIdx = rdx;
3349   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3350   const Register totalNumIter = r8;
3351 
3352   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3353   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3354   const Register tmp1 = r11;                    // Caller save.
3355   const Register tmp2 = rax;                    // Caller save.
< prev index next >