< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page

   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/vmIntrinsics.hpp"

  28 #include "compiler/oopMap.hpp"
  29 #include "gc/shared/barrierSet.hpp"
  30 #include "gc/shared/barrierSetAssembler.hpp"
  31 #include "gc/shared/barrierSetNMethod.hpp"
  32 #include "gc/shared/gc_globals.hpp"
  33 #include "memory/universe.hpp"
  34 #include "prims/jvmtiExport.hpp"
  35 #include "prims/upcallLinker.hpp"
  36 #include "runtime/arguments.hpp"
  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/stubRoutines.hpp"
  40 #include "stubGenerator_x86_64.hpp"
  41 #ifdef COMPILER2
  42 #include "opto/runtime.hpp"
  43 #include "opto/c2_globals.hpp"
  44 #endif
  45 #if INCLUDE_JVMCI
  46 #include "jvmci/jvmci_globals.hpp"
  47 #endif

3035 
3036 
3037 /**
3038  *  Arguments:
3039  *
3040  *  Input:
3041  *    c_rarg0   - x address
3042  *    c_rarg1   - x length
3043  *    c_rarg2   - y address
3044  *    c_rarg3   - y length
3045  * not Win64
3046  *    c_rarg4   - z address
3047  * Win64
3048  *    rsp+40    - z address
3049  */
3050 address StubGenerator::generate_multiplyToLen() {
3051   __ align(CodeEntryAlignment);
3052   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3053   address start = __ pc();
3054 




3055   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3056   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3057   const Register x     = rdi;
3058   const Register xlen  = rax;
3059   const Register y     = rsi;
3060   const Register ylen  = rcx;
3061   const Register z     = r8;
3062 
3063   // Next registers will be saved on stack in multiply_to_len().
3064   const Register tmp0  = r11;
3065   const Register tmp1  = r12;
3066   const Register tmp2  = r13;
3067   const Register tmp3  = r14;
3068   const Register tmp4  = r15;
3069   const Register tmp5  = rbx;
3070 
3071   BLOCK_COMMENT("Entry:");
3072   __ enter(); // required for proper stackwalking of RuntimeStub frame
3073 
3074   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3075                      // ylen => rcx, z => r8
3076                      // r9 and r10 may be used to save non-volatile registers
3077 #ifdef _WIN64
3078   // last argument (#4) is on stack on Win64
3079   __ movptr(z, Address(rsp, 6 * wordSize));
3080 #endif
3081 
3082   __ movptr(xlen, rsi);
3083   __ movptr(y,    rdx);
3084   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3085 
3086   restore_arg_regs();
3087 
3088   __ leave(); // required for proper stackwalking of RuntimeStub frame
3089   __ ret(0);
3090 

3091   return start;
3092 }
3093 
3094 /**
3095 *  Arguments:
3096 *
3097 *  Input:
3098 *    c_rarg0   - obja     address
3099 *    c_rarg1   - objb     address
3100 *    c_rarg3   - length   length
3101 *    c_rarg4   - scale    log2_array_indxscale
3102 *
3103 *  Output:
3104 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3105 */
3106 address StubGenerator::generate_vectorizedMismatch() {
3107   __ align(CodeEntryAlignment);
3108   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3109   address start = __ pc();
3110 

3142 
3143   return start;
3144 }
3145 
3146 /**
3147  *  Arguments:
3148  *
3149 //  Input:
3150 //    c_rarg0   - x address
3151 //    c_rarg1   - x length
3152 //    c_rarg2   - z address
3153 //    c_rarg3   - z length
3154  *
3155  */
3156 address StubGenerator::generate_squareToLen() {
3157 
3158   __ align(CodeEntryAlignment);
3159   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3160   address start = __ pc();
3161 




3162   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3163   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3164   const Register x      = rdi;
3165   const Register len    = rsi;
3166   const Register z      = r8;
3167   const Register zlen   = rcx;
3168 
3169  const Register tmp1      = r12;
3170  const Register tmp2      = r13;
3171  const Register tmp3      = r14;
3172  const Register tmp4      = r15;
3173  const Register tmp5      = rbx;
3174 
3175   BLOCK_COMMENT("Entry:");
3176   __ enter(); // required for proper stackwalking of RuntimeStub frame
3177 
3178   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3179                      // zlen => rcx
3180                      // r9 and r10 may be used to save non-volatile registers
3181   __ movptr(r8, rdx);
3182   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3183 
3184   restore_arg_regs();
3185 
3186   __ leave(); // required for proper stackwalking of RuntimeStub frame
3187   __ ret(0);
3188 

3189   return start;
3190 }
3191 
3192 address StubGenerator::generate_method_entry_barrier() {
3193   __ align(CodeEntryAlignment);
3194   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3195   address start = __ pc();
3196 
3197   Label deoptimize_label;
3198 
3199   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3200 
3201   BLOCK_COMMENT("Entry:");
3202   __ enter(); // save rbp
3203 
3204   // save c_rarg0, because we want to use that value.
3205   // We could do without it but then we depend on the number of slots used by pusha
3206   __ push(c_rarg0);
3207 
3208   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3266 }
3267 
3268  /**
3269  *  Arguments:
3270  *
3271  *  Input:
3272  *    c_rarg0   - out address
3273  *    c_rarg1   - in address
3274  *    c_rarg2   - offset
3275  *    c_rarg3   - len
3276  * not Win64
3277  *    c_rarg4   - k
3278  * Win64
3279  *    rsp+40    - k
3280  */
3281 address StubGenerator::generate_mulAdd() {
3282   __ align(CodeEntryAlignment);
3283   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3284   address start = __ pc();
3285 




3286   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3287   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3288   const Register out     = rdi;
3289   const Register in      = rsi;
3290   const Register offset  = r11;
3291   const Register len     = rcx;
3292   const Register k       = r8;
3293 
3294   // Next registers will be saved on stack in mul_add().
3295   const Register tmp1  = r12;
3296   const Register tmp2  = r13;
3297   const Register tmp3  = r14;
3298   const Register tmp4  = r15;
3299   const Register tmp5  = rbx;
3300 
3301   BLOCK_COMMENT("Entry:");
3302   __ enter(); // required for proper stackwalking of RuntimeStub frame
3303 
3304   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3305                      // len => rcx, k => r8
3306                      // r9 and r10 may be used to save non-volatile registers
3307 #ifdef _WIN64
3308   // last argument is on stack on Win64
3309   __ movl(k, Address(rsp, 6 * wordSize));
3310 #endif
3311   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3312   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3313 
3314   restore_arg_regs();
3315 
3316   __ leave(); // required for proper stackwalking of RuntimeStub frame
3317   __ ret(0);
3318 

3319   return start;
3320 }
3321 
3322 address StubGenerator::generate_bigIntegerRightShift() {
3323   __ align(CodeEntryAlignment);
3324   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3325   address start = __ pc();
3326 
3327   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3328   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3329   const Register newArr = rdi;
3330   const Register oldArr = rsi;
3331   const Register newIdx = rdx;
3332   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3333   const Register totalNumIter = r8;
3334 
3335   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3336   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3337   const Register tmp1 = r11;                    // Caller save.
3338   const Register tmp2 = rax;                    // Caller save.

   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/SCCache.hpp"
  29 #include "compiler/oopMap.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "gc/shared/barrierSetNMethod.hpp"
  33 #include "gc/shared/gc_globals.hpp"
  34 #include "memory/universe.hpp"
  35 #include "prims/jvmtiExport.hpp"
  36 #include "prims/upcallLinker.hpp"
  37 #include "runtime/arguments.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "stubGenerator_x86_64.hpp"
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #include "opto/c2_globals.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmci_globals.hpp"
  48 #endif

3036 
3037 
3038 /**
3039  *  Arguments:
3040  *
3041  *  Input:
3042  *    c_rarg0   - x address
3043  *    c_rarg1   - x length
3044  *    c_rarg2   - y address
3045  *    c_rarg3   - y length
3046  * not Win64
3047  *    c_rarg4   - z address
3048  * Win64
3049  *    rsp+40    - z address
3050  */
3051 address StubGenerator::generate_multiplyToLen() {
3052   __ align(CodeEntryAlignment);
3053   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3054   address start = __ pc();
3055 
3056   if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3057     return start;
3058   }
3059 
3060   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3061   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3062   const Register x     = rdi;
3063   const Register xlen  = rax;
3064   const Register y     = rsi;
3065   const Register ylen  = rcx;
3066   const Register z     = r8;
3067 
3068   // Next registers will be saved on stack in multiply_to_len().
3069   const Register tmp0  = r11;
3070   const Register tmp1  = r12;
3071   const Register tmp2  = r13;
3072   const Register tmp3  = r14;
3073   const Register tmp4  = r15;
3074   const Register tmp5  = rbx;
3075 
3076   BLOCK_COMMENT("Entry:");
3077   __ enter(); // required for proper stackwalking of RuntimeStub frame
3078 
3079   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3080                      // ylen => rcx, z => r8
3081                      // r9 and r10 may be used to save non-volatile registers
3082 #ifdef _WIN64
3083   // last argument (#4) is on stack on Win64
3084   __ movptr(z, Address(rsp, 6 * wordSize));
3085 #endif
3086 
3087   __ movptr(xlen, rsi);
3088   __ movptr(y,    rdx);
3089   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3090 
3091   restore_arg_regs();
3092 
3093   __ leave(); // required for proper stackwalking of RuntimeStub frame
3094   __ ret(0);
3095 
3096   SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3097   return start;
3098 }
3099 
3100 /**
3101 *  Arguments:
3102 *
3103 *  Input:
3104 *    c_rarg0   - obja     address
3105 *    c_rarg1   - objb     address
3106 *    c_rarg3   - length   length
3107 *    c_rarg4   - scale    log2_array_indxscale
3108 *
3109 *  Output:
3110 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3111 */
3112 address StubGenerator::generate_vectorizedMismatch() {
3113   __ align(CodeEntryAlignment);
3114   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3115   address start = __ pc();
3116 

3148 
3149   return start;
3150 }
3151 
3152 /**
3153  *  Arguments:
3154  *
3155 //  Input:
3156 //    c_rarg0   - x address
3157 //    c_rarg1   - x length
3158 //    c_rarg2   - z address
3159 //    c_rarg3   - z length
3160  *
3161  */
3162 address StubGenerator::generate_squareToLen() {
3163 
3164   __ align(CodeEntryAlignment);
3165   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3166   address start = __ pc();
3167 
3168   if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3169     return start;
3170   }
3171 
3172   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3173   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3174   const Register x      = rdi;
3175   const Register len    = rsi;
3176   const Register z      = r8;
3177   const Register zlen   = rcx;
3178 
3179  const Register tmp1      = r12;
3180  const Register tmp2      = r13;
3181  const Register tmp3      = r14;
3182  const Register tmp4      = r15;
3183  const Register tmp5      = rbx;
3184 
3185   BLOCK_COMMENT("Entry:");
3186   __ enter(); // required for proper stackwalking of RuntimeStub frame
3187 
3188   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3189                      // zlen => rcx
3190                      // r9 and r10 may be used to save non-volatile registers
3191   __ movptr(r8, rdx);
3192   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3193 
3194   restore_arg_regs();
3195 
3196   __ leave(); // required for proper stackwalking of RuntimeStub frame
3197   __ ret(0);
3198 
3199   SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3200   return start;
3201 }
3202 
3203 address StubGenerator::generate_method_entry_barrier() {
3204   __ align(CodeEntryAlignment);
3205   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3206   address start = __ pc();
3207 
3208   Label deoptimize_label;
3209 
3210   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3211 
3212   BLOCK_COMMENT("Entry:");
3213   __ enter(); // save rbp
3214 
3215   // save c_rarg0, because we want to use that value.
3216   // We could do without it but then we depend on the number of slots used by pusha
3217   __ push(c_rarg0);
3218 
3219   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3277 }
3278 
3279  /**
3280  *  Arguments:
3281  *
3282  *  Input:
3283  *    c_rarg0   - out address
3284  *    c_rarg1   - in address
3285  *    c_rarg2   - offset
3286  *    c_rarg3   - len
3287  * not Win64
3288  *    c_rarg4   - k
3289  * Win64
3290  *    rsp+40    - k
3291  */
3292 address StubGenerator::generate_mulAdd() {
3293   __ align(CodeEntryAlignment);
3294   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3295   address start = __ pc();
3296 
3297   if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3298     return start;
3299   }
3300 
3301   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3302   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3303   const Register out     = rdi;
3304   const Register in      = rsi;
3305   const Register offset  = r11;
3306   const Register len     = rcx;
3307   const Register k       = r8;
3308 
3309   // Next registers will be saved on stack in mul_add().
3310   const Register tmp1  = r12;
3311   const Register tmp2  = r13;
3312   const Register tmp3  = r14;
3313   const Register tmp4  = r15;
3314   const Register tmp5  = rbx;
3315 
3316   BLOCK_COMMENT("Entry:");
3317   __ enter(); // required for proper stackwalking of RuntimeStub frame
3318 
3319   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3320                      // len => rcx, k => r8
3321                      // r9 and r10 may be used to save non-volatile registers
3322 #ifdef _WIN64
3323   // last argument is on stack on Win64
3324   __ movl(k, Address(rsp, 6 * wordSize));
3325 #endif
3326   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3327   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3328 
3329   restore_arg_regs();
3330 
3331   __ leave(); // required for proper stackwalking of RuntimeStub frame
3332   __ ret(0);
3333 
3334   SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3335   return start;
3336 }
3337 
3338 address StubGenerator::generate_bigIntegerRightShift() {
3339   __ align(CodeEntryAlignment);
3340   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3341   address start = __ pc();
3342 
3343   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3344   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3345   const Register newArr = rdi;
3346   const Register oldArr = rsi;
3347   const Register newIdx = rdx;
3348   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3349   const Register totalNumIter = r8;
3350 
3351   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3352   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3353   const Register tmp1 = r11;                    // Caller save.
3354   const Register tmp2 = rax;                    // Caller save.
< prev index next >