< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page

   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/vmIntrinsics.hpp"

  28 #include "compiler/oopMap.hpp"
  29 #include "gc/shared/barrierSet.hpp"
  30 #include "gc/shared/barrierSetAssembler.hpp"
  31 #include "gc/shared/barrierSetNMethod.hpp"
  32 #include "gc/shared/gc_globals.hpp"
  33 #include "memory/universe.hpp"
  34 #include "prims/jvmtiExport.hpp"
  35 #include "prims/upcallLinker.hpp"
  36 #include "runtime/arguments.hpp"
  37 #include "runtime/javaThread.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/stubRoutines.hpp"
  40 #include "stubGenerator_x86_64.hpp"
  41 #ifdef COMPILER2
  42 #include "opto/runtime.hpp"
  43 #include "opto/c2_globals.hpp"
  44 #endif
  45 #if INCLUDE_JVMCI
  46 #include "jvmci/jvmci_globals.hpp"
  47 #endif

3037 /**
3038  *  Arguments:
3039  *
3040  *  Input:
3041  *    c_rarg0   - x address
3042  *    c_rarg1   - x length
3043  *    c_rarg2   - y address
3044  *    c_rarg3   - y length
3045  * not Win64
3046  *    c_rarg4   - z address
3047  *    c_rarg5   - z length
3048  * Win64
3049  *    rsp+40    - z address
3050  *    rsp+48    - z length
3051  */
3052 address StubGenerator::generate_multiplyToLen() {
3053   __ align(CodeEntryAlignment);
3054   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3055   address start = __ pc();
3056 




3057   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3058   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3059   const Register x     = rdi;
3060   const Register xlen  = rax;
3061   const Register y     = rsi;
3062   const Register ylen  = rcx;
3063   const Register z     = r8;
3064   const Register zlen  = r11;
3065 
3066   // Next registers will be saved on stack in multiply_to_len().
3067   const Register tmp1  = r12;
3068   const Register tmp2  = r13;
3069   const Register tmp3  = r14;
3070   const Register tmp4  = r15;
3071   const Register tmp5  = rbx;
3072 
3073   BLOCK_COMMENT("Entry:");
3074   __ enter(); // required for proper stackwalking of RuntimeStub frame
3075 
3076 #ifndef _WIN64
3077   __ movptr(zlen, r9); // Save r9 in r11 - zlen
3078 #endif
3079   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3080                      // ylen => rcx, z => r8, zlen => r11
3081                      // r9 and r10 may be used to save non-volatile registers
3082 #ifdef _WIN64
3083   // last 2 arguments (#4, #5) are on stack on Win64
3084   __ movptr(z, Address(rsp, 6 * wordSize));
3085   __ movptr(zlen, Address(rsp, 7 * wordSize));
3086 #endif
3087 
3088   __ movptr(xlen, rsi);
3089   __ movptr(y,    rdx);
3090   __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
3091 
3092   restore_arg_regs();
3093 
3094   __ leave(); // required for proper stackwalking of RuntimeStub frame
3095   __ ret(0);
3096 

3097   return start;
3098 }
3099 
3100 /**
3101 *  Arguments:
3102 *
3103 *  Input:
3104 *    c_rarg0   - obja     address
3105 *    c_rarg1   - objb     address
3106 *    c_rarg3   - length   length
3107 *    c_rarg4   - scale    log2_array_indxscale
3108 *
3109 *  Output:
3110 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3111 */
3112 address StubGenerator::generate_vectorizedMismatch() {
3113   __ align(CodeEntryAlignment);
3114   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3115   address start = __ pc();
3116 

3148 
3149   return start;
3150 }
3151 
3152 /**
3153  *  Arguments:
3154  *
3155 //  Input:
3156 //    c_rarg0   - x address
3157 //    c_rarg1   - x length
3158 //    c_rarg2   - z address
3159 //    c_rarg3   - z length
3160  *
3161  */
3162 address StubGenerator::generate_squareToLen() {
3163 
3164   __ align(CodeEntryAlignment);
3165   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3166   address start = __ pc();
3167 




3168   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3169   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3170   const Register x      = rdi;
3171   const Register len    = rsi;
3172   const Register z      = r8;
3173   const Register zlen   = rcx;
3174 
3175  const Register tmp1      = r12;
3176  const Register tmp2      = r13;
3177  const Register tmp3      = r14;
3178  const Register tmp4      = r15;
3179  const Register tmp5      = rbx;
3180 
3181   BLOCK_COMMENT("Entry:");
3182   __ enter(); // required for proper stackwalking of RuntimeStub frame
3183 
3184   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3185                      // zlen => rcx
3186                      // r9 and r10 may be used to save non-volatile registers
3187   __ movptr(r8, rdx);
3188   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3189 
3190   restore_arg_regs();
3191 
3192   __ leave(); // required for proper stackwalking of RuntimeStub frame
3193   __ ret(0);
3194 

3195   return start;
3196 }
3197 
3198 address StubGenerator::generate_method_entry_barrier() {
3199   __ align(CodeEntryAlignment);
3200   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3201   address start = __ pc();
3202 
3203   Label deoptimize_label;
3204 
3205   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3206 
3207   BLOCK_COMMENT("Entry:");
3208   __ enter(); // save rbp
3209 
3210   // save c_rarg0, because we want to use that value.
3211   // We could do without it but then we depend on the number of slots used by pusha
3212   __ push(c_rarg0);
3213 
3214   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3272 }
3273 
3274  /**
3275  *  Arguments:
3276  *
3277  *  Input:
3278  *    c_rarg0   - out address
3279  *    c_rarg1   - in address
3280  *    c_rarg2   - offset
3281  *    c_rarg3   - len
3282  * not Win64
3283  *    c_rarg4   - k
3284  * Win64
3285  *    rsp+40    - k
3286  */
3287 address StubGenerator::generate_mulAdd() {
3288   __ align(CodeEntryAlignment);
3289   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3290   address start = __ pc();
3291 




3292   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3293   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3294   const Register out     = rdi;
3295   const Register in      = rsi;
3296   const Register offset  = r11;
3297   const Register len     = rcx;
3298   const Register k       = r8;
3299 
3300   // Next registers will be saved on stack in mul_add().
3301   const Register tmp1  = r12;
3302   const Register tmp2  = r13;
3303   const Register tmp3  = r14;
3304   const Register tmp4  = r15;
3305   const Register tmp5  = rbx;
3306 
3307   BLOCK_COMMENT("Entry:");
3308   __ enter(); // required for proper stackwalking of RuntimeStub frame
3309 
3310   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3311                      // len => rcx, k => r8
3312                      // r9 and r10 may be used to save non-volatile registers
3313 #ifdef _WIN64
3314   // last argument is on stack on Win64
3315   __ movl(k, Address(rsp, 6 * wordSize));
3316 #endif
3317   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3318   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3319 
3320   restore_arg_regs();
3321 
3322   __ leave(); // required for proper stackwalking of RuntimeStub frame
3323   __ ret(0);
3324 

3325   return start;
3326 }
3327 
3328 address StubGenerator::generate_bigIntegerRightShift() {
3329   __ align(CodeEntryAlignment);
3330   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3331   address start = __ pc();
3332 
3333   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3334   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3335   const Register newArr = rdi;
3336   const Register oldArr = rsi;
3337   const Register newIdx = rdx;
3338   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3339   const Register totalNumIter = r8;
3340 
3341   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3342   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3343   const Register tmp1 = r11;                    // Caller save.
3344   const Register tmp2 = rax;                    // Caller save.

   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/SCCache.hpp"
  29 #include "compiler/oopMap.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "gc/shared/barrierSetNMethod.hpp"
  33 #include "gc/shared/gc_globals.hpp"
  34 #include "memory/universe.hpp"
  35 #include "prims/jvmtiExport.hpp"
  36 #include "prims/upcallLinker.hpp"
  37 #include "runtime/arguments.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "stubGenerator_x86_64.hpp"
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #include "opto/c2_globals.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmci_globals.hpp"
  48 #endif

3038 /**
3039  *  Arguments:
3040  *
3041  *  Input:
3042  *    c_rarg0   - x address
3043  *    c_rarg1   - x length
3044  *    c_rarg2   - y address
3045  *    c_rarg3   - y length
3046  * not Win64
3047  *    c_rarg4   - z address
3048  *    c_rarg5   - z length
3049  * Win64
3050  *    rsp+40    - z address
3051  *    rsp+48    - z length
3052  */
3053 address StubGenerator::generate_multiplyToLen() {
3054   __ align(CodeEntryAlignment);
3055   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3056   address start = __ pc();
3057 
3058   if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3059     return start;
3060   }
3061 
3062   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3063   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3064   const Register x     = rdi;
3065   const Register xlen  = rax;
3066   const Register y     = rsi;
3067   const Register ylen  = rcx;
3068   const Register z     = r8;
3069   const Register zlen  = r11;
3070 
3071   // Next registers will be saved on stack in multiply_to_len().
3072   const Register tmp1  = r12;
3073   const Register tmp2  = r13;
3074   const Register tmp3  = r14;
3075   const Register tmp4  = r15;
3076   const Register tmp5  = rbx;
3077 
3078   BLOCK_COMMENT("Entry:");
3079   __ enter(); // required for proper stackwalking of RuntimeStub frame
3080 
3081 #ifndef _WIN64
3082   __ movptr(zlen, r9); // Save r9 in r11 - zlen
3083 #endif
3084   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3085                      // ylen => rcx, z => r8, zlen => r11
3086                      // r9 and r10 may be used to save non-volatile registers
3087 #ifdef _WIN64
3088   // last 2 arguments (#4, #5) are on stack on Win64
3089   __ movptr(z, Address(rsp, 6 * wordSize));
3090   __ movptr(zlen, Address(rsp, 7 * wordSize));
3091 #endif
3092 
3093   __ movptr(xlen, rsi);
3094   __ movptr(y,    rdx);
3095   __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
3096 
3097   restore_arg_regs();
3098 
3099   __ leave(); // required for proper stackwalking of RuntimeStub frame
3100   __ ret(0);
3101 
3102   SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3103   return start;
3104 }
3105 
3106 /**
3107 *  Arguments:
3108 *
3109 *  Input:
3110 *    c_rarg0   - obja     address
3111 *    c_rarg1   - objb     address
3112 *    c_rarg3   - length   length
3113 *    c_rarg4   - scale    log2_array_indxscale
3114 *
3115 *  Output:
3116 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3117 */
3118 address StubGenerator::generate_vectorizedMismatch() {
3119   __ align(CodeEntryAlignment);
3120   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3121   address start = __ pc();
3122 

3154 
3155   return start;
3156 }
3157 
3158 /**
3159  *  Arguments:
3160  *
3161 //  Input:
3162 //    c_rarg0   - x address
3163 //    c_rarg1   - x length
3164 //    c_rarg2   - z address
3165 //    c_rarg3   - z length
3166  *
3167  */
3168 address StubGenerator::generate_squareToLen() {
3169 
3170   __ align(CodeEntryAlignment);
3171   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3172   address start = __ pc();
3173 
3174   if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3175     return start;
3176   }
3177 
3178   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3179   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3180   const Register x      = rdi;
3181   const Register len    = rsi;
3182   const Register z      = r8;
3183   const Register zlen   = rcx;
3184 
3185  const Register tmp1      = r12;
3186  const Register tmp2      = r13;
3187  const Register tmp3      = r14;
3188  const Register tmp4      = r15;
3189  const Register tmp5      = rbx;
3190 
3191   BLOCK_COMMENT("Entry:");
3192   __ enter(); // required for proper stackwalking of RuntimeStub frame
3193 
3194   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3195                      // zlen => rcx
3196                      // r9 and r10 may be used to save non-volatile registers
3197   __ movptr(r8, rdx);
3198   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3199 
3200   restore_arg_regs();
3201 
3202   __ leave(); // required for proper stackwalking of RuntimeStub frame
3203   __ ret(0);
3204 
3205   SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3206   return start;
3207 }
3208 
3209 address StubGenerator::generate_method_entry_barrier() {
3210   __ align(CodeEntryAlignment);
3211   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3212   address start = __ pc();
3213 
3214   Label deoptimize_label;
3215 
3216   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3217 
3218   BLOCK_COMMENT("Entry:");
3219   __ enter(); // save rbp
3220 
3221   // save c_rarg0, because we want to use that value.
3222   // We could do without it but then we depend on the number of slots used by pusha
3223   __ push(c_rarg0);
3224 
3225   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3283 }
3284 
3285  /**
3286  *  Arguments:
3287  *
3288  *  Input:
3289  *    c_rarg0   - out address
3290  *    c_rarg1   - in address
3291  *    c_rarg2   - offset
3292  *    c_rarg3   - len
3293  * not Win64
3294  *    c_rarg4   - k
3295  * Win64
3296  *    rsp+40    - k
3297  */
3298 address StubGenerator::generate_mulAdd() {
3299   __ align(CodeEntryAlignment);
3300   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3301   address start = __ pc();
3302 
3303   if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3304     return start;
3305   }
3306 
3307   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3308   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3309   const Register out     = rdi;
3310   const Register in      = rsi;
3311   const Register offset  = r11;
3312   const Register len     = rcx;
3313   const Register k       = r8;
3314 
3315   // Next registers will be saved on stack in mul_add().
3316   const Register tmp1  = r12;
3317   const Register tmp2  = r13;
3318   const Register tmp3  = r14;
3319   const Register tmp4  = r15;
3320   const Register tmp5  = rbx;
3321 
3322   BLOCK_COMMENT("Entry:");
3323   __ enter(); // required for proper stackwalking of RuntimeStub frame
3324 
3325   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3326                      // len => rcx, k => r8
3327                      // r9 and r10 may be used to save non-volatile registers
3328 #ifdef _WIN64
3329   // last argument is on stack on Win64
3330   __ movl(k, Address(rsp, 6 * wordSize));
3331 #endif
3332   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3333   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3334 
3335   restore_arg_regs();
3336 
3337   __ leave(); // required for proper stackwalking of RuntimeStub frame
3338   __ ret(0);
3339 
3340   SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3341   return start;
3342 }
3343 
3344 address StubGenerator::generate_bigIntegerRightShift() {
3345   __ align(CodeEntryAlignment);
3346   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3347   address start = __ pc();
3348 
3349   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3350   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3351   const Register newArr = rdi;
3352   const Register oldArr = rsi;
3353   const Register newIdx = rdx;
3354   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3355   const Register totalNumIter = r8;
3356 
3357   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3358   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3359   const Register tmp1 = r11;                    // Caller save.
3360   const Register tmp2 = rax;                    // Caller save.
< prev index next >