8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "compiler/oopMap.hpp"
29 #include "gc/shared/barrierSet.hpp"
30 #include "gc/shared/barrierSetAssembler.hpp"
31 #include "gc/shared/barrierSetNMethod.hpp"
32 #include "gc/shared/gc_globals.hpp"
33 #include "memory/universe.hpp"
34 #include "prims/jvmtiExport.hpp"
35 #include "prims/upcallLinker.hpp"
36 #include "runtime/arguments.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubRoutines.hpp"
40 #include "stubGenerator_x86_64.hpp"
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #include "opto/c2_globals.hpp"
44 #endif
45 #if INCLUDE_JVMCI
46 #include "jvmci/jvmci_globals.hpp"
47 #endif
3037 /**
3038 * Arguments:
3039 *
3040 * Input:
3041 * c_rarg0 - x address
3042 * c_rarg1 - x length
3043 * c_rarg2 - y address
3044 * c_rarg3 - y length
3045 * not Win64
3046 * c_rarg4 - z address
3047 * c_rarg5 - z length
3048 * Win64
3049 * rsp+40 - z address
3050 * rsp+48 - z length
3051 */
3052 address StubGenerator::generate_multiplyToLen() {
3053 __ align(CodeEntryAlignment);
3054 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3055 address start = __ pc();
3056
3057 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3058 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3059 const Register x = rdi;
3060 const Register xlen = rax;
3061 const Register y = rsi;
3062 const Register ylen = rcx;
3063 const Register z = r8;
3064 const Register zlen = r11;
3065
3066 // Next registers will be saved on stack in multiply_to_len().
3067 const Register tmp1 = r12;
3068 const Register tmp2 = r13;
3069 const Register tmp3 = r14;
3070 const Register tmp4 = r15;
3071 const Register tmp5 = rbx;
3072
3073 BLOCK_COMMENT("Entry:");
3074 __ enter(); // required for proper stackwalking of RuntimeStub frame
3075
3076 #ifndef _WIN64
3077 __ movptr(zlen, r9); // Save r9 in r11 - zlen
3078 #endif
3079 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3080 // ylen => rcx, z => r8, zlen => r11
3081 // r9 and r10 may be used to save non-volatile registers
3082 #ifdef _WIN64
3083 // last 2 arguments (#4, #5) are on stack on Win64
3084 __ movptr(z, Address(rsp, 6 * wordSize));
3085 __ movptr(zlen, Address(rsp, 7 * wordSize));
3086 #endif
3087
3088 __ movptr(xlen, rsi);
3089 __ movptr(y, rdx);
3090 __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
3091
3092 restore_arg_regs();
3093
3094 __ leave(); // required for proper stackwalking of RuntimeStub frame
3095 __ ret(0);
3096
3097 return start;
3098 }
3099
3100 /**
3101 * Arguments:
3102 *
3103 * Input:
3104 * c_rarg0 - obja address
3105 * c_rarg1 - objb address
3106 * c_rarg3 - length length
3107 * c_rarg4 - scale log2_array_indxscale
3108 *
3109 * Output:
3110 * rax - int >= mismatched index, < 0 bitwise complement of tail
3111 */
3112 address StubGenerator::generate_vectorizedMismatch() {
3113 __ align(CodeEntryAlignment);
3114 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3115 address start = __ pc();
3116
3148
3149 return start;
3150 }
3151
3152 /**
3153 * Arguments:
3154 *
3155 // Input:
3156 // c_rarg0 - x address
3157 // c_rarg1 - x length
3158 // c_rarg2 - z address
3159 // c_rarg3 - z length
3160 *
3161 */
3162 address StubGenerator::generate_squareToLen() {
3163
3164 __ align(CodeEntryAlignment);
3165 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3166 address start = __ pc();
3167
3168 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3169 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3170 const Register x = rdi;
3171 const Register len = rsi;
3172 const Register z = r8;
3173 const Register zlen = rcx;
3174
3175 const Register tmp1 = r12;
3176 const Register tmp2 = r13;
3177 const Register tmp3 = r14;
3178 const Register tmp4 = r15;
3179 const Register tmp5 = rbx;
3180
3181 BLOCK_COMMENT("Entry:");
3182 __ enter(); // required for proper stackwalking of RuntimeStub frame
3183
3184 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3185 // zlen => rcx
3186 // r9 and r10 may be used to save non-volatile registers
3187 __ movptr(r8, rdx);
3188 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3189
3190 restore_arg_regs();
3191
3192 __ leave(); // required for proper stackwalking of RuntimeStub frame
3193 __ ret(0);
3194
3195 return start;
3196 }
3197
3198 address StubGenerator::generate_method_entry_barrier() {
3199 __ align(CodeEntryAlignment);
3200 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3201 address start = __ pc();
3202
3203 Label deoptimize_label;
3204
3205 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3206
3207 BLOCK_COMMENT("Entry:");
3208 __ enter(); // save rbp
3209
3210 // save c_rarg0, because we want to use that value.
3211 // We could do without it but then we depend on the number of slots used by pusha
3212 __ push(c_rarg0);
3213
3214 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3272 }
3273
3274 /**
3275 * Arguments:
3276 *
3277 * Input:
3278 * c_rarg0 - out address
3279 * c_rarg1 - in address
3280 * c_rarg2 - offset
3281 * c_rarg3 - len
3282 * not Win64
3283 * c_rarg4 - k
3284 * Win64
3285 * rsp+40 - k
3286 */
3287 address StubGenerator::generate_mulAdd() {
3288 __ align(CodeEntryAlignment);
3289 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3290 address start = __ pc();
3291
3292 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3293 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3294 const Register out = rdi;
3295 const Register in = rsi;
3296 const Register offset = r11;
3297 const Register len = rcx;
3298 const Register k = r8;
3299
3300 // Next registers will be saved on stack in mul_add().
3301 const Register tmp1 = r12;
3302 const Register tmp2 = r13;
3303 const Register tmp3 = r14;
3304 const Register tmp4 = r15;
3305 const Register tmp5 = rbx;
3306
3307 BLOCK_COMMENT("Entry:");
3308 __ enter(); // required for proper stackwalking of RuntimeStub frame
3309
3310 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3311 // len => rcx, k => r8
3312 // r9 and r10 may be used to save non-volatile registers
3313 #ifdef _WIN64
3314 // last argument is on stack on Win64
3315 __ movl(k, Address(rsp, 6 * wordSize));
3316 #endif
3317 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3318 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3319
3320 restore_arg_regs();
3321
3322 __ leave(); // required for proper stackwalking of RuntimeStub frame
3323 __ ret(0);
3324
3325 return start;
3326 }
3327
3328 address StubGenerator::generate_bigIntegerRightShift() {
3329 __ align(CodeEntryAlignment);
3330 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3331 address start = __ pc();
3332
3333 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3334 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3335 const Register newArr = rdi;
3336 const Register oldArr = rsi;
3337 const Register newIdx = rdx;
3338 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3339 const Register totalNumIter = r8;
3340
3341 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3342 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3343 const Register tmp1 = r11; // Caller save.
3344 const Register tmp2 = rax; // Caller save.
|
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/SCCache.hpp"
29 #include "compiler/oopMap.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/barrierSetNMethod.hpp"
33 #include "gc/shared/gc_globals.hpp"
34 #include "memory/universe.hpp"
35 #include "prims/jvmtiExport.hpp"
36 #include "prims/upcallLinker.hpp"
37 #include "runtime/arguments.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "stubGenerator_x86_64.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #include "opto/c2_globals.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmci_globals.hpp"
48 #endif
3038 /**
3039 * Arguments:
3040 *
3041 * Input:
3042 * c_rarg0 - x address
3043 * c_rarg1 - x length
3044 * c_rarg2 - y address
3045 * c_rarg3 - y length
3046 * not Win64
3047 * c_rarg4 - z address
3048 * c_rarg5 - z length
3049 * Win64
3050 * rsp+40 - z address
3051 * rsp+48 - z length
3052 */
3053 address StubGenerator::generate_multiplyToLen() {
3054 __ align(CodeEntryAlignment);
3055 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3056 address start = __ pc();
3057
3058 if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3059 return start;
3060 }
3061
3062 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3063 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3064 const Register x = rdi;
3065 const Register xlen = rax;
3066 const Register y = rsi;
3067 const Register ylen = rcx;
3068 const Register z = r8;
3069 const Register zlen = r11;
3070
3071 // Next registers will be saved on stack in multiply_to_len().
3072 const Register tmp1 = r12;
3073 const Register tmp2 = r13;
3074 const Register tmp3 = r14;
3075 const Register tmp4 = r15;
3076 const Register tmp5 = rbx;
3077
3078 BLOCK_COMMENT("Entry:");
3079 __ enter(); // required for proper stackwalking of RuntimeStub frame
3080
3081 #ifndef _WIN64
3082 __ movptr(zlen, r9); // Save r9 in r11 - zlen
3083 #endif
3084 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3085 // ylen => rcx, z => r8, zlen => r11
3086 // r9 and r10 may be used to save non-volatile registers
3087 #ifdef _WIN64
3088 // last 2 arguments (#4, #5) are on stack on Win64
3089 __ movptr(z, Address(rsp, 6 * wordSize));
3090 __ movptr(zlen, Address(rsp, 7 * wordSize));
3091 #endif
3092
3093 __ movptr(xlen, rsi);
3094 __ movptr(y, rdx);
3095 __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
3096
3097 restore_arg_regs();
3098
3099 __ leave(); // required for proper stackwalking of RuntimeStub frame
3100 __ ret(0);
3101
3102 SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3103 return start;
3104 }
3105
3106 /**
3107 * Arguments:
3108 *
3109 * Input:
3110 * c_rarg0 - obja address
3111 * c_rarg1 - objb address
3112 * c_rarg3 - length length
3113 * c_rarg4 - scale log2_array_indxscale
3114 *
3115 * Output:
3116 * rax - int >= mismatched index, < 0 bitwise complement of tail
3117 */
3118 address StubGenerator::generate_vectorizedMismatch() {
3119 __ align(CodeEntryAlignment);
3120 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3121 address start = __ pc();
3122
3154
3155 return start;
3156 }
3157
3158 /**
3159 * Arguments:
3160 *
3161 // Input:
3162 // c_rarg0 - x address
3163 // c_rarg1 - x length
3164 // c_rarg2 - z address
3165 // c_rarg3 - z length
3166 *
3167 */
3168 address StubGenerator::generate_squareToLen() {
3169
3170 __ align(CodeEntryAlignment);
3171 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3172 address start = __ pc();
3173
3174 if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3175 return start;
3176 }
3177
3178 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3179 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3180 const Register x = rdi;
3181 const Register len = rsi;
3182 const Register z = r8;
3183 const Register zlen = rcx;
3184
3185 const Register tmp1 = r12;
3186 const Register tmp2 = r13;
3187 const Register tmp3 = r14;
3188 const Register tmp4 = r15;
3189 const Register tmp5 = rbx;
3190
3191 BLOCK_COMMENT("Entry:");
3192 __ enter(); // required for proper stackwalking of RuntimeStub frame
3193
3194 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3195 // zlen => rcx
3196 // r9 and r10 may be used to save non-volatile registers
3197 __ movptr(r8, rdx);
3198 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3199
3200 restore_arg_regs();
3201
3202 __ leave(); // required for proper stackwalking of RuntimeStub frame
3203 __ ret(0);
3204
3205 SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3206 return start;
3207 }
3208
3209 address StubGenerator::generate_method_entry_barrier() {
3210 __ align(CodeEntryAlignment);
3211 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3212 address start = __ pc();
3213
3214 Label deoptimize_label;
3215
3216 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3217
3218 BLOCK_COMMENT("Entry:");
3219 __ enter(); // save rbp
3220
3221 // save c_rarg0, because we want to use that value.
3222 // We could do without it but then we depend on the number of slots used by pusha
3223 __ push(c_rarg0);
3224
3225 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3283 }
3284
3285 /**
3286 * Arguments:
3287 *
3288 * Input:
3289 * c_rarg0 - out address
3290 * c_rarg1 - in address
3291 * c_rarg2 - offset
3292 * c_rarg3 - len
3293 * not Win64
3294 * c_rarg4 - k
3295 * Win64
3296 * rsp+40 - k
3297 */
3298 address StubGenerator::generate_mulAdd() {
3299 __ align(CodeEntryAlignment);
3300 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3301 address start = __ pc();
3302
3303 if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3304 return start;
3305 }
3306
3307 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3308 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3309 const Register out = rdi;
3310 const Register in = rsi;
3311 const Register offset = r11;
3312 const Register len = rcx;
3313 const Register k = r8;
3314
3315 // Next registers will be saved on stack in mul_add().
3316 const Register tmp1 = r12;
3317 const Register tmp2 = r13;
3318 const Register tmp3 = r14;
3319 const Register tmp4 = r15;
3320 const Register tmp5 = rbx;
3321
3322 BLOCK_COMMENT("Entry:");
3323 __ enter(); // required for proper stackwalking of RuntimeStub frame
3324
3325 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3326 // len => rcx, k => r8
3327 // r9 and r10 may be used to save non-volatile registers
3328 #ifdef _WIN64
3329 // last argument is on stack on Win64
3330 __ movl(k, Address(rsp, 6 * wordSize));
3331 #endif
3332 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3333 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3334
3335 restore_arg_regs();
3336
3337 __ leave(); // required for proper stackwalking of RuntimeStub frame
3338 __ ret(0);
3339
3340 SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3341 return start;
3342 }
3343
3344 address StubGenerator::generate_bigIntegerRightShift() {
3345 __ align(CodeEntryAlignment);
3346 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3347 address start = __ pc();
3348
3349 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3350 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3351 const Register newArr = rdi;
3352 const Register oldArr = rsi;
3353 const Register newIdx = rdx;
3354 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3355 const Register totalNumIter = r8;
3356
3357 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3358 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3359 const Register tmp1 = r11; // Caller save.
3360 const Register tmp2 = rax; // Caller save.
|