8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "compiler/oopMap.hpp"
29 #include "gc/shared/barrierSet.hpp"
30 #include "gc/shared/barrierSetAssembler.hpp"
31 #include "gc/shared/barrierSetNMethod.hpp"
32 #include "gc/shared/gc_globals.hpp"
33 #include "memory/universe.hpp"
34 #include "prims/jvmtiExport.hpp"
35 #include "prims/upcallLinker.hpp"
36 #include "runtime/arguments.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubRoutines.hpp"
40 #include "stubGenerator_x86_64.hpp"
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #include "opto/c2_globals.hpp"
44 #endif
45 #if INCLUDE_JVMCI
46 #include "jvmci/jvmci_globals.hpp"
47 #endif
3035
3036
3037 /**
3038 * Arguments:
3039 *
3040 * Input:
3041 * c_rarg0 - x address
3042 * c_rarg1 - x length
3043 * c_rarg2 - y address
3044 * c_rarg3 - y length
3045 * not Win64
3046 * c_rarg4 - z address
3047 * Win64
3048 * rsp+40 - z address
3049 */
3050 address StubGenerator::generate_multiplyToLen() {
3051 __ align(CodeEntryAlignment);
3052 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3053 address start = __ pc();
3054
3055 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3056 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3057 const Register x = rdi;
3058 const Register xlen = rax;
3059 const Register y = rsi;
3060 const Register ylen = rcx;
3061 const Register z = r8;
3062
3063 // Next registers will be saved on stack in multiply_to_len().
3064 const Register tmp0 = r11;
3065 const Register tmp1 = r12;
3066 const Register tmp2 = r13;
3067 const Register tmp3 = r14;
3068 const Register tmp4 = r15;
3069 const Register tmp5 = rbx;
3070
3071 BLOCK_COMMENT("Entry:");
3072 __ enter(); // required for proper stackwalking of RuntimeStub frame
3073
3074 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3075 // ylen => rcx, z => r8
3076 // r9 and r10 may be used to save non-volatile registers
3077 #ifdef _WIN64
3078 // last argument (#4) is on stack on Win64
3079 __ movptr(z, Address(rsp, 6 * wordSize));
3080 #endif
3081
3082 __ movptr(xlen, rsi);
3083 __ movptr(y, rdx);
3084 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3085
3086 restore_arg_regs();
3087
3088 __ leave(); // required for proper stackwalking of RuntimeStub frame
3089 __ ret(0);
3090
3091 return start;
3092 }
3093
3094 /**
3095 * Arguments:
3096 *
3097 * Input:
3098 * c_rarg0 - obja address
3099 * c_rarg1 - objb address
3100 * c_rarg3 - length length
3101 * c_rarg4 - scale log2_array_indxscale
3102 *
3103 * Output:
3104 * rax - int >= mismatched index, < 0 bitwise complement of tail
3105 */
3106 address StubGenerator::generate_vectorizedMismatch() {
3107 __ align(CodeEntryAlignment);
3108 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3109 address start = __ pc();
3110
3142
3143 return start;
3144 }
3145
3146 /**
3147 * Arguments:
3148 *
3149 // Input:
3150 // c_rarg0 - x address
3151 // c_rarg1 - x length
3152 // c_rarg2 - z address
3153 // c_rarg3 - z length
3154 *
3155 */
3156 address StubGenerator::generate_squareToLen() {
3157
3158 __ align(CodeEntryAlignment);
3159 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3160 address start = __ pc();
3161
3162 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3163 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3164 const Register x = rdi;
3165 const Register len = rsi;
3166 const Register z = r8;
3167 const Register zlen = rcx;
3168
3169 const Register tmp1 = r12;
3170 const Register tmp2 = r13;
3171 const Register tmp3 = r14;
3172 const Register tmp4 = r15;
3173 const Register tmp5 = rbx;
3174
3175 BLOCK_COMMENT("Entry:");
3176 __ enter(); // required for proper stackwalking of RuntimeStub frame
3177
3178 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3179 // zlen => rcx
3180 // r9 and r10 may be used to save non-volatile registers
3181 __ movptr(r8, rdx);
3182 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3183
3184 restore_arg_regs();
3185
3186 __ leave(); // required for proper stackwalking of RuntimeStub frame
3187 __ ret(0);
3188
3189 return start;
3190 }
3191
3192 address StubGenerator::generate_method_entry_barrier() {
3193 __ align(CodeEntryAlignment);
3194 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3195 address start = __ pc();
3196
3197 Label deoptimize_label;
3198
3199 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3200
3201 BLOCK_COMMENT("Entry:");
3202 __ enter(); // save rbp
3203
3204 // save c_rarg0, because we want to use that value.
3205 // We could do without it but then we depend on the number of slots used by pusha
3206 __ push(c_rarg0);
3207
3208 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3266 }
3267
3268 /**
3269 * Arguments:
3270 *
3271 * Input:
3272 * c_rarg0 - out address
3273 * c_rarg1 - in address
3274 * c_rarg2 - offset
3275 * c_rarg3 - len
3276 * not Win64
3277 * c_rarg4 - k
3278 * Win64
3279 * rsp+40 - k
3280 */
3281 address StubGenerator::generate_mulAdd() {
3282 __ align(CodeEntryAlignment);
3283 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3284 address start = __ pc();
3285
3286 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3287 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3288 const Register out = rdi;
3289 const Register in = rsi;
3290 const Register offset = r11;
3291 const Register len = rcx;
3292 const Register k = r8;
3293
3294 // Next registers will be saved on stack in mul_add().
3295 const Register tmp1 = r12;
3296 const Register tmp2 = r13;
3297 const Register tmp3 = r14;
3298 const Register tmp4 = r15;
3299 const Register tmp5 = rbx;
3300
3301 BLOCK_COMMENT("Entry:");
3302 __ enter(); // required for proper stackwalking of RuntimeStub frame
3303
3304 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3305 // len => rcx, k => r8
3306 // r9 and r10 may be used to save non-volatile registers
3307 #ifdef _WIN64
3308 // last argument is on stack on Win64
3309 __ movl(k, Address(rsp, 6 * wordSize));
3310 #endif
3311 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3312 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3313
3314 restore_arg_regs();
3315
3316 __ leave(); // required for proper stackwalking of RuntimeStub frame
3317 __ ret(0);
3318
3319 return start;
3320 }
3321
3322 address StubGenerator::generate_bigIntegerRightShift() {
3323 __ align(CodeEntryAlignment);
3324 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3325 address start = __ pc();
3326
3327 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3328 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3329 const Register newArr = rdi;
3330 const Register oldArr = rsi;
3331 const Register newIdx = rdx;
3332 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3333 const Register totalNumIter = r8;
3334
3335 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3336 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3337 const Register tmp1 = r11; // Caller save.
3338 const Register tmp2 = rax; // Caller save.
|
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/SCCache.hpp"
29 #include "compiler/oopMap.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/barrierSetNMethod.hpp"
33 #include "gc/shared/gc_globals.hpp"
34 #include "memory/universe.hpp"
35 #include "prims/jvmtiExport.hpp"
36 #include "prims/upcallLinker.hpp"
37 #include "runtime/arguments.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "stubGenerator_x86_64.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #include "opto/c2_globals.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmci_globals.hpp"
48 #endif
3036
3037
3038 /**
3039 * Arguments:
3040 *
3041 * Input:
3042 * c_rarg0 - x address
3043 * c_rarg1 - x length
3044 * c_rarg2 - y address
3045 * c_rarg3 - y length
3046 * not Win64
3047 * c_rarg4 - z address
3048 * Win64
3049 * rsp+40 - z address
3050 */
3051 address StubGenerator::generate_multiplyToLen() {
3052 __ align(CodeEntryAlignment);
3053 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3054 address start = __ pc();
3055
3056 if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3057 return start;
3058 }
3059
3060 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3061 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3062 const Register x = rdi;
3063 const Register xlen = rax;
3064 const Register y = rsi;
3065 const Register ylen = rcx;
3066 const Register z = r8;
3067
3068 // Next registers will be saved on stack in multiply_to_len().
3069 const Register tmp0 = r11;
3070 const Register tmp1 = r12;
3071 const Register tmp2 = r13;
3072 const Register tmp3 = r14;
3073 const Register tmp4 = r15;
3074 const Register tmp5 = rbx;
3075
3076 BLOCK_COMMENT("Entry:");
3077 __ enter(); // required for proper stackwalking of RuntimeStub frame
3078
3079 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3080 // ylen => rcx, z => r8
3081 // r9 and r10 may be used to save non-volatile registers
3082 #ifdef _WIN64
3083 // last argument (#4) is on stack on Win64
3084 __ movptr(z, Address(rsp, 6 * wordSize));
3085 #endif
3086
3087 __ movptr(xlen, rsi);
3088 __ movptr(y, rdx);
3089 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3090
3091 restore_arg_regs();
3092
3093 __ leave(); // required for proper stackwalking of RuntimeStub frame
3094 __ ret(0);
3095
3096 SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3097 return start;
3098 }
3099
3100 /**
3101 * Arguments:
3102 *
3103 * Input:
3104 * c_rarg0 - obja address
3105 * c_rarg1 - objb address
3106 * c_rarg3 - length length
3107 * c_rarg4 - scale log2_array_indxscale
3108 *
3109 * Output:
3110 * rax - int >= mismatched index, < 0 bitwise complement of tail
3111 */
3112 address StubGenerator::generate_vectorizedMismatch() {
3113 __ align(CodeEntryAlignment);
3114 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3115 address start = __ pc();
3116
3148
3149 return start;
3150 }
3151
3152 /**
3153 * Arguments:
3154 *
3155 // Input:
3156 // c_rarg0 - x address
3157 // c_rarg1 - x length
3158 // c_rarg2 - z address
3159 // c_rarg3 - z length
3160 *
3161 */
3162 address StubGenerator::generate_squareToLen() {
3163
3164 __ align(CodeEntryAlignment);
3165 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3166 address start = __ pc();
3167
3168 if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3169 return start;
3170 }
3171
3172 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3173 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3174 const Register x = rdi;
3175 const Register len = rsi;
3176 const Register z = r8;
3177 const Register zlen = rcx;
3178
3179 const Register tmp1 = r12;
3180 const Register tmp2 = r13;
3181 const Register tmp3 = r14;
3182 const Register tmp4 = r15;
3183 const Register tmp5 = rbx;
3184
3185 BLOCK_COMMENT("Entry:");
3186 __ enter(); // required for proper stackwalking of RuntimeStub frame
3187
3188 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3189 // zlen => rcx
3190 // r9 and r10 may be used to save non-volatile registers
3191 __ movptr(r8, rdx);
3192 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3193
3194 restore_arg_regs();
3195
3196 __ leave(); // required for proper stackwalking of RuntimeStub frame
3197 __ ret(0);
3198
3199 SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3200 return start;
3201 }
3202
3203 address StubGenerator::generate_method_entry_barrier() {
3204 __ align(CodeEntryAlignment);
3205 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3206 address start = __ pc();
3207
3208 Label deoptimize_label;
3209
3210 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3211
3212 BLOCK_COMMENT("Entry:");
3213 __ enter(); // save rbp
3214
3215 // save c_rarg0, because we want to use that value.
3216 // We could do without it but then we depend on the number of slots used by pusha
3217 __ push(c_rarg0);
3218
3219 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3277 }
3278
3279 /**
3280 * Arguments:
3281 *
3282 * Input:
3283 * c_rarg0 - out address
3284 * c_rarg1 - in address
3285 * c_rarg2 - offset
3286 * c_rarg3 - len
3287 * not Win64
3288 * c_rarg4 - k
3289 * Win64
3290 * rsp+40 - k
3291 */
3292 address StubGenerator::generate_mulAdd() {
3293 __ align(CodeEntryAlignment);
3294 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3295 address start = __ pc();
3296
3297 if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3298 return start;
3299 }
3300
3301 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3302 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3303 const Register out = rdi;
3304 const Register in = rsi;
3305 const Register offset = r11;
3306 const Register len = rcx;
3307 const Register k = r8;
3308
3309 // Next registers will be saved on stack in mul_add().
3310 const Register tmp1 = r12;
3311 const Register tmp2 = r13;
3312 const Register tmp3 = r14;
3313 const Register tmp4 = r15;
3314 const Register tmp5 = rbx;
3315
3316 BLOCK_COMMENT("Entry:");
3317 __ enter(); // required for proper stackwalking of RuntimeStub frame
3318
3319 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3320 // len => rcx, k => r8
3321 // r9 and r10 may be used to save non-volatile registers
3322 #ifdef _WIN64
3323 // last argument is on stack on Win64
3324 __ movl(k, Address(rsp, 6 * wordSize));
3325 #endif
3326 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3327 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3328
3329 restore_arg_regs();
3330
3331 __ leave(); // required for proper stackwalking of RuntimeStub frame
3332 __ ret(0);
3333
3334 SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3335 return start;
3336 }
3337
3338 address StubGenerator::generate_bigIntegerRightShift() {
3339 __ align(CodeEntryAlignment);
3340 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3341 address start = __ pc();
3342
3343 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3344 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3345 const Register newArr = rdi;
3346 const Register oldArr = rsi;
3347 const Register newIdx = rdx;
3348 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3349 const Register totalNumIter = r8;
3350
3351 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3352 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3353 const Register tmp1 = r11; // Caller save.
3354 const Register tmp2 = rax; // Caller save.
|