< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "classfile/vmIntrinsics.hpp"

  29 #include "compiler/oopMap.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "gc/shared/barrierSetNMethod.hpp"
  33 #include "gc/shared/gc_globals.hpp"
  34 #include "memory/universe.hpp"
  35 #include "prims/jvmtiExport.hpp"
  36 #include "prims/upcallLinker.hpp"
  37 #include "runtime/arguments.hpp"
  38 #include "runtime/javaThread.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "stubGenerator_x86_64.hpp"
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #include "opto/c2_globals.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmci_globals.hpp"
  48 #endif

3033 
3034 
3035 /**
3036  *  Arguments:
3037  *
3038  *  Input:
3039  *    c_rarg0   - x address
3040  *    c_rarg1   - x length
3041  *    c_rarg2   - y address
3042  *    c_rarg3   - y length
3043  * not Win64
3044  *    c_rarg4   - z address
3045  * Win64
3046  *    rsp+40    - z address
3047  */
3048 address StubGenerator::generate_multiplyToLen() {
3049   __ align(CodeEntryAlignment);
3050   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3051   address start = __ pc();
3052 




3053   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3054   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3055   const Register x     = rdi;
3056   const Register xlen  = rax;
3057   const Register y     = rsi;
3058   const Register ylen  = rcx;
3059   const Register z     = r8;
3060 
3061   // Next registers will be saved on stack in multiply_to_len().
3062   const Register tmp0  = r11;
3063   const Register tmp1  = r12;
3064   const Register tmp2  = r13;
3065   const Register tmp3  = r14;
3066   const Register tmp4  = r15;
3067   const Register tmp5  = rbx;
3068 
3069   BLOCK_COMMENT("Entry:");
3070   __ enter(); // required for proper stackwalking of RuntimeStub frame
3071 
3072   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3073                      // ylen => rcx, z => r8
3074                      // r9 and r10 may be used to save non-volatile registers
3075 #ifdef _WIN64
3076   // last argument (#4) is on stack on Win64
3077   __ movptr(z, Address(rsp, 6 * wordSize));
3078 #endif
3079 
3080   __ movptr(xlen, rsi);
3081   __ movptr(y,    rdx);
3082   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3083 
3084   restore_arg_regs();
3085 
3086   __ leave(); // required for proper stackwalking of RuntimeStub frame
3087   __ ret(0);
3088 

3089   return start;
3090 }
3091 
3092 /**
3093 *  Arguments:
3094 *
3095 *  Input:
3096 *    c_rarg0   - obja     address
3097 *    c_rarg1   - objb     address
3098 *    c_rarg3   - length   length
3099 *    c_rarg4   - scale    log2_array_indxscale
3100 *
3101 *  Output:
3102 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3103 */
3104 address StubGenerator::generate_vectorizedMismatch() {
3105   __ align(CodeEntryAlignment);
3106   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3107   address start = __ pc();
3108 

3140 
3141   return start;
3142 }
3143 
3144 /**
3145  *  Arguments:
3146  *
3147 //  Input:
3148 //    c_rarg0   - x address
3149 //    c_rarg1   - x length
3150 //    c_rarg2   - z address
3151 //    c_rarg3   - z length
3152  *
3153  */
3154 address StubGenerator::generate_squareToLen() {
3155 
3156   __ align(CodeEntryAlignment);
3157   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3158   address start = __ pc();
3159 




3160   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3161   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3162   const Register x      = rdi;
3163   const Register len    = rsi;
3164   const Register z      = r8;
3165   const Register zlen   = rcx;
3166 
3167  const Register tmp1      = r12;
3168  const Register tmp2      = r13;
3169  const Register tmp3      = r14;
3170  const Register tmp4      = r15;
3171  const Register tmp5      = rbx;
3172 
3173   BLOCK_COMMENT("Entry:");
3174   __ enter(); // required for proper stackwalking of RuntimeStub frame
3175 
3176   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3177                      // zlen => rcx
3178                      // r9 and r10 may be used to save non-volatile registers
3179   __ movptr(r8, rdx);
3180   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3181 
3182   restore_arg_regs();
3183 
3184   __ leave(); // required for proper stackwalking of RuntimeStub frame
3185   __ ret(0);
3186 

3187   return start;
3188 }
3189 
3190 address StubGenerator::generate_method_entry_barrier() {
3191   __ align(CodeEntryAlignment);
3192   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3193   address start = __ pc();
3194 
3195   Label deoptimize_label;
3196 
3197   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3198 
3199   BLOCK_COMMENT("Entry:");
3200   __ enter(); // save rbp
3201 
3202   // save c_rarg0, because we want to use that value.
3203   // We could do without it but then we depend on the number of slots used by pusha
3204   __ push(c_rarg0);
3205 
3206   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3264 }
3265 
3266  /**
3267  *  Arguments:
3268  *
3269  *  Input:
3270  *    c_rarg0   - out address
3271  *    c_rarg1   - in address
3272  *    c_rarg2   - offset
3273  *    c_rarg3   - len
3274  * not Win64
3275  *    c_rarg4   - k
3276  * Win64
3277  *    rsp+40    - k
3278  */
3279 address StubGenerator::generate_mulAdd() {
3280   __ align(CodeEntryAlignment);
3281   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3282   address start = __ pc();
3283 




3284   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3285   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3286   const Register out     = rdi;
3287   const Register in      = rsi;
3288   const Register offset  = r11;
3289   const Register len     = rcx;
3290   const Register k       = r8;
3291 
3292   // Next registers will be saved on stack in mul_add().
3293   const Register tmp1  = r12;
3294   const Register tmp2  = r13;
3295   const Register tmp3  = r14;
3296   const Register tmp4  = r15;
3297   const Register tmp5  = rbx;
3298 
3299   BLOCK_COMMENT("Entry:");
3300   __ enter(); // required for proper stackwalking of RuntimeStub frame
3301 
3302   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3303                      // len => rcx, k => r8
3304                      // r9 and r10 may be used to save non-volatile registers
3305 #ifdef _WIN64
3306   // last argument is on stack on Win64
3307   __ movl(k, Address(rsp, 6 * wordSize));
3308 #endif
3309   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3310   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3311 
3312   restore_arg_regs();
3313 
3314   __ leave(); // required for proper stackwalking of RuntimeStub frame
3315   __ ret(0);
3316 

3317   return start;
3318 }
3319 
3320 address StubGenerator::generate_bigIntegerRightShift() {
3321   __ align(CodeEntryAlignment);
3322   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3323   address start = __ pc();
3324 
3325   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3326   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3327   const Register newArr = rdi;
3328   const Register oldArr = rsi;
3329   const Register newIdx = rdx;
3330   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3331   const Register totalNumIter = r8;
3332 
3333   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3334   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3335   const Register tmp1 = r11;                    // Caller save.
3336   const Register tmp2 = rax;                    // Caller save.

   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/SCCache.hpp"
  30 #include "compiler/oopMap.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/barrierSetNMethod.hpp"
  34 #include "gc/shared/gc_globals.hpp"
  35 #include "memory/universe.hpp"
  36 #include "prims/jvmtiExport.hpp"
  37 #include "prims/upcallLinker.hpp"
  38 #include "runtime/arguments.hpp"
  39 #include "runtime/javaThread.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "stubGenerator_x86_64.hpp"
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #include "opto/c2_globals.hpp"
  46 #endif
  47 #if INCLUDE_JVMCI
  48 #include "jvmci/jvmci_globals.hpp"
  49 #endif

3034 
3035 
3036 /**
3037  *  Arguments:
3038  *
3039  *  Input:
3040  *    c_rarg0   - x address
3041  *    c_rarg1   - x length
3042  *    c_rarg2   - y address
3043  *    c_rarg3   - y length
3044  * not Win64
3045  *    c_rarg4   - z address
3046  * Win64
3047  *    rsp+40    - z address
3048  */
3049 address StubGenerator::generate_multiplyToLen() {
3050   __ align(CodeEntryAlignment);
3051   StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3052   address start = __ pc();
3053 
3054   if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3055     return start;
3056   }
3057 
3058   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3059   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3060   const Register x     = rdi;
3061   const Register xlen  = rax;
3062   const Register y     = rsi;
3063   const Register ylen  = rcx;
3064   const Register z     = r8;
3065 
3066   // Next registers will be saved on stack in multiply_to_len().
3067   const Register tmp0  = r11;
3068   const Register tmp1  = r12;
3069   const Register tmp2  = r13;
3070   const Register tmp3  = r14;
3071   const Register tmp4  = r15;
3072   const Register tmp5  = rbx;
3073 
3074   BLOCK_COMMENT("Entry:");
3075   __ enter(); // required for proper stackwalking of RuntimeStub frame
3076 
3077   setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3078                      // ylen => rcx, z => r8
3079                      // r9 and r10 may be used to save non-volatile registers
3080 #ifdef _WIN64
3081   // last argument (#4) is on stack on Win64
3082   __ movptr(z, Address(rsp, 6 * wordSize));
3083 #endif
3084 
3085   __ movptr(xlen, rsi);
3086   __ movptr(y,    rdx);
3087   __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3088 
3089   restore_arg_regs();
3090 
3091   __ leave(); // required for proper stackwalking of RuntimeStub frame
3092   __ ret(0);
3093 
3094   SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3095   return start;
3096 }
3097 
3098 /**
3099 *  Arguments:
3100 *
3101 *  Input:
3102 *    c_rarg0   - obja     address
3103 *    c_rarg1   - objb     address
3104 *    c_rarg3   - length   length
3105 *    c_rarg4   - scale    log2_array_indxscale
3106 *
3107 *  Output:
3108 *        rax   - int >= mismatched index, < 0 bitwise complement of tail
3109 */
3110 address StubGenerator::generate_vectorizedMismatch() {
3111   __ align(CodeEntryAlignment);
3112   StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3113   address start = __ pc();
3114 

3146 
3147   return start;
3148 }
3149 
3150 /**
3151  *  Arguments:
3152  *
3153 //  Input:
3154 //    c_rarg0   - x address
3155 //    c_rarg1   - x length
3156 //    c_rarg2   - z address
3157 //    c_rarg3   - z length
3158  *
3159  */
3160 address StubGenerator::generate_squareToLen() {
3161 
3162   __ align(CodeEntryAlignment);
3163   StubCodeMark mark(this, "StubRoutines", "squareToLen");
3164   address start = __ pc();
3165 
3166   if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3167     return start;
3168   }
3169 
3170   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3171   // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3172   const Register x      = rdi;
3173   const Register len    = rsi;
3174   const Register z      = r8;
3175   const Register zlen   = rcx;
3176 
3177  const Register tmp1      = r12;
3178  const Register tmp2      = r13;
3179  const Register tmp3      = r14;
3180  const Register tmp4      = r15;
3181  const Register tmp5      = rbx;
3182 
3183   BLOCK_COMMENT("Entry:");
3184   __ enter(); // required for proper stackwalking of RuntimeStub frame
3185 
3186   setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3187                      // zlen => rcx
3188                      // r9 and r10 may be used to save non-volatile registers
3189   __ movptr(r8, rdx);
3190   __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3191 
3192   restore_arg_regs();
3193 
3194   __ leave(); // required for proper stackwalking of RuntimeStub frame
3195   __ ret(0);
3196 
3197   SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3198   return start;
3199 }
3200 
3201 address StubGenerator::generate_method_entry_barrier() {
3202   __ align(CodeEntryAlignment);
3203   StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3204   address start = __ pc();
3205 
3206   Label deoptimize_label;
3207 
3208   __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3209 
3210   BLOCK_COMMENT("Entry:");
3211   __ enter(); // save rbp
3212 
3213   // save c_rarg0, because we want to use that value.
3214   // We could do without it but then we depend on the number of slots used by pusha
3215   __ push(c_rarg0);
3216 
3217   __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address

3275 }
3276 
3277  /**
3278  *  Arguments:
3279  *
3280  *  Input:
3281  *    c_rarg0   - out address
3282  *    c_rarg1   - in address
3283  *    c_rarg2   - offset
3284  *    c_rarg3   - len
3285  * not Win64
3286  *    c_rarg4   - k
3287  * Win64
3288  *    rsp+40    - k
3289  */
3290 address StubGenerator::generate_mulAdd() {
3291   __ align(CodeEntryAlignment);
3292   StubCodeMark mark(this, "StubRoutines", "mulAdd");
3293   address start = __ pc();
3294 
3295   if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3296     return start;
3297   }
3298 
3299   // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3300   // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3301   const Register out     = rdi;
3302   const Register in      = rsi;
3303   const Register offset  = r11;
3304   const Register len     = rcx;
3305   const Register k       = r8;
3306 
3307   // Next registers will be saved on stack in mul_add().
3308   const Register tmp1  = r12;
3309   const Register tmp2  = r13;
3310   const Register tmp3  = r14;
3311   const Register tmp4  = r15;
3312   const Register tmp5  = rbx;
3313 
3314   BLOCK_COMMENT("Entry:");
3315   __ enter(); // required for proper stackwalking of RuntimeStub frame
3316 
3317   setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3318                      // len => rcx, k => r8
3319                      // r9 and r10 may be used to save non-volatile registers
3320 #ifdef _WIN64
3321   // last argument is on stack on Win64
3322   __ movl(k, Address(rsp, 6 * wordSize));
3323 #endif
3324   __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
3325   __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3326 
3327   restore_arg_regs();
3328 
3329   __ leave(); // required for proper stackwalking of RuntimeStub frame
3330   __ ret(0);
3331 
3332   SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3333   return start;
3334 }
3335 
3336 address StubGenerator::generate_bigIntegerRightShift() {
3337   __ align(CodeEntryAlignment);
3338   StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3339   address start = __ pc();
3340 
3341   Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3342   // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3343   const Register newArr = rdi;
3344   const Register oldArr = rsi;
3345   const Register newIdx = rdx;
3346   const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3347   const Register totalNumIter = r8;
3348 
3349   // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3350   // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3351   const Register tmp1 = r11;                    // Caller save.
3352   const Register tmp2 = rax;                    // Caller save.
< prev index next >