9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/vmIntrinsics.hpp"
29 #include "compiler/oopMap.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/barrierSetNMethod.hpp"
33 #include "gc/shared/gc_globals.hpp"
34 #include "memory/universe.hpp"
35 #include "prims/jvmtiExport.hpp"
36 #include "prims/upcallLinker.hpp"
37 #include "runtime/arguments.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "stubGenerator_x86_64.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #include "opto/c2_globals.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmci_globals.hpp"
48 #endif
3033
3034
3035 /**
3036 * Arguments:
3037 *
3038 * Input:
3039 * c_rarg0 - x address
3040 * c_rarg1 - x length
3041 * c_rarg2 - y address
3042 * c_rarg3 - y length
3043 * not Win64
3044 * c_rarg4 - z address
3045 * Win64
3046 * rsp+40 - z address
3047 */
3048 address StubGenerator::generate_multiplyToLen() {
3049 __ align(CodeEntryAlignment);
3050 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3051 address start = __ pc();
3052
3053 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3054 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3055 const Register x = rdi;
3056 const Register xlen = rax;
3057 const Register y = rsi;
3058 const Register ylen = rcx;
3059 const Register z = r8;
3060
3061 // Next registers will be saved on stack in multiply_to_len().
3062 const Register tmp0 = r11;
3063 const Register tmp1 = r12;
3064 const Register tmp2 = r13;
3065 const Register tmp3 = r14;
3066 const Register tmp4 = r15;
3067 const Register tmp5 = rbx;
3068
3069 BLOCK_COMMENT("Entry:");
3070 __ enter(); // required for proper stackwalking of RuntimeStub frame
3071
3072 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3073 // ylen => rcx, z => r8
3074 // r9 and r10 may be used to save non-volatile registers
3075 #ifdef _WIN64
3076 // last argument (#4) is on stack on Win64
3077 __ movptr(z, Address(rsp, 6 * wordSize));
3078 #endif
3079
3080 __ movptr(xlen, rsi);
3081 __ movptr(y, rdx);
3082 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3083
3084 restore_arg_regs();
3085
3086 __ leave(); // required for proper stackwalking of RuntimeStub frame
3087 __ ret(0);
3088
3089 return start;
3090 }
3091
3092 /**
3093 * Arguments:
3094 *
3095 * Input:
3096 * c_rarg0 - obja address
3097 * c_rarg1 - objb address
3098 * c_rarg3 - length length
3099 * c_rarg4 - scale log2_array_indxscale
3100 *
3101 * Output:
3102 * rax - int >= mismatched index, < 0 bitwise complement of tail
3103 */
3104 address StubGenerator::generate_vectorizedMismatch() {
3105 __ align(CodeEntryAlignment);
3106 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3107 address start = __ pc();
3108
3140
3141 return start;
3142 }
3143
3144 /**
3145 * Arguments:
3146 *
3147 // Input:
3148 // c_rarg0 - x address
3149 // c_rarg1 - x length
3150 // c_rarg2 - z address
3151 // c_rarg3 - z length
3152 *
3153 */
3154 address StubGenerator::generate_squareToLen() {
3155
3156 __ align(CodeEntryAlignment);
3157 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3158 address start = __ pc();
3159
3160 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3161 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3162 const Register x = rdi;
3163 const Register len = rsi;
3164 const Register z = r8;
3165 const Register zlen = rcx;
3166
3167 const Register tmp1 = r12;
3168 const Register tmp2 = r13;
3169 const Register tmp3 = r14;
3170 const Register tmp4 = r15;
3171 const Register tmp5 = rbx;
3172
3173 BLOCK_COMMENT("Entry:");
3174 __ enter(); // required for proper stackwalking of RuntimeStub frame
3175
3176 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3177 // zlen => rcx
3178 // r9 and r10 may be used to save non-volatile registers
3179 __ movptr(r8, rdx);
3180 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3181
3182 restore_arg_regs();
3183
3184 __ leave(); // required for proper stackwalking of RuntimeStub frame
3185 __ ret(0);
3186
3187 return start;
3188 }
3189
3190 address StubGenerator::generate_method_entry_barrier() {
3191 __ align(CodeEntryAlignment);
3192 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3193 address start = __ pc();
3194
3195 Label deoptimize_label;
3196
3197 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3198
3199 BLOCK_COMMENT("Entry:");
3200 __ enter(); // save rbp
3201
3202 // save c_rarg0, because we want to use that value.
3203 // We could do without it but then we depend on the number of slots used by pusha
3204 __ push(c_rarg0);
3205
3206 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3264 }
3265
3266 /**
3267 * Arguments:
3268 *
3269 * Input:
3270 * c_rarg0 - out address
3271 * c_rarg1 - in address
3272 * c_rarg2 - offset
3273 * c_rarg3 - len
3274 * not Win64
3275 * c_rarg4 - k
3276 * Win64
3277 * rsp+40 - k
3278 */
3279 address StubGenerator::generate_mulAdd() {
3280 __ align(CodeEntryAlignment);
3281 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3282 address start = __ pc();
3283
3284 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3285 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3286 const Register out = rdi;
3287 const Register in = rsi;
3288 const Register offset = r11;
3289 const Register len = rcx;
3290 const Register k = r8;
3291
3292 // Next registers will be saved on stack in mul_add().
3293 const Register tmp1 = r12;
3294 const Register tmp2 = r13;
3295 const Register tmp3 = r14;
3296 const Register tmp4 = r15;
3297 const Register tmp5 = rbx;
3298
3299 BLOCK_COMMENT("Entry:");
3300 __ enter(); // required for proper stackwalking of RuntimeStub frame
3301
3302 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3303 // len => rcx, k => r8
3304 // r9 and r10 may be used to save non-volatile registers
3305 #ifdef _WIN64
3306 // last argument is on stack on Win64
3307 __ movl(k, Address(rsp, 6 * wordSize));
3308 #endif
3309 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3310 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3311
3312 restore_arg_regs();
3313
3314 __ leave(); // required for proper stackwalking of RuntimeStub frame
3315 __ ret(0);
3316
3317 return start;
3318 }
3319
3320 address StubGenerator::generate_bigIntegerRightShift() {
3321 __ align(CodeEntryAlignment);
3322 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3323 address start = __ pc();
3324
3325 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3326 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3327 const Register newArr = rdi;
3328 const Register oldArr = rsi;
3329 const Register newIdx = rdx;
3330 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3331 const Register totalNumIter = r8;
3332
3333 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3334 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3335 const Register tmp1 = r11; // Caller save.
3336 const Register tmp2 = rax; // Caller save.
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/vmIntrinsics.hpp"
29 #include "code/SCCache.hpp"
30 #include "compiler/oopMap.hpp"
31 #include "gc/shared/barrierSet.hpp"
32 #include "gc/shared/barrierSetAssembler.hpp"
33 #include "gc/shared/barrierSetNMethod.hpp"
34 #include "gc/shared/gc_globals.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/jvmtiExport.hpp"
37 #include "prims/upcallLinker.hpp"
38 #include "runtime/arguments.hpp"
39 #include "runtime/javaThread.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "stubGenerator_x86_64.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #include "opto/c2_globals.hpp"
46 #endif
47 #if INCLUDE_JVMCI
48 #include "jvmci/jvmci_globals.hpp"
49 #endif
3034
3035
3036 /**
3037 * Arguments:
3038 *
3039 * Input:
3040 * c_rarg0 - x address
3041 * c_rarg1 - x length
3042 * c_rarg2 - y address
3043 * c_rarg3 - y length
3044 * not Win64
3045 * c_rarg4 - z address
3046 * Win64
3047 * rsp+40 - z address
3048 */
3049 address StubGenerator::generate_multiplyToLen() {
3050 __ align(CodeEntryAlignment);
3051 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3052 address start = __ pc();
3053
3054 if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3055 return start;
3056 }
3057
3058 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3059 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3060 const Register x = rdi;
3061 const Register xlen = rax;
3062 const Register y = rsi;
3063 const Register ylen = rcx;
3064 const Register z = r8;
3065
3066 // Next registers will be saved on stack in multiply_to_len().
3067 const Register tmp0 = r11;
3068 const Register tmp1 = r12;
3069 const Register tmp2 = r13;
3070 const Register tmp3 = r14;
3071 const Register tmp4 = r15;
3072 const Register tmp5 = rbx;
3073
3074 BLOCK_COMMENT("Entry:");
3075 __ enter(); // required for proper stackwalking of RuntimeStub frame
3076
3077 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3078 // ylen => rcx, z => r8
3079 // r9 and r10 may be used to save non-volatile registers
3080 #ifdef _WIN64
3081 // last argument (#4) is on stack on Win64
3082 __ movptr(z, Address(rsp, 6 * wordSize));
3083 #endif
3084
3085 __ movptr(xlen, rsi);
3086 __ movptr(y, rdx);
3087 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3088
3089 restore_arg_regs();
3090
3091 __ leave(); // required for proper stackwalking of RuntimeStub frame
3092 __ ret(0);
3093
3094 SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3095 return start;
3096 }
3097
3098 /**
3099 * Arguments:
3100 *
3101 * Input:
3102 * c_rarg0 - obja address
3103 * c_rarg1 - objb address
3104 * c_rarg3 - length length
3105 * c_rarg4 - scale log2_array_indxscale
3106 *
3107 * Output:
3108 * rax - int >= mismatched index, < 0 bitwise complement of tail
3109 */
3110 address StubGenerator::generate_vectorizedMismatch() {
3111 __ align(CodeEntryAlignment);
3112 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3113 address start = __ pc();
3114
3146
3147 return start;
3148 }
3149
3150 /**
3151 * Arguments:
3152 *
3153 // Input:
3154 // c_rarg0 - x address
3155 // c_rarg1 - x length
3156 // c_rarg2 - z address
3157 // c_rarg3 - z length
3158 *
3159 */
3160 address StubGenerator::generate_squareToLen() {
3161
3162 __ align(CodeEntryAlignment);
3163 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3164 address start = __ pc();
3165
3166 if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3167 return start;
3168 }
3169
3170 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3171 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3172 const Register x = rdi;
3173 const Register len = rsi;
3174 const Register z = r8;
3175 const Register zlen = rcx;
3176
3177 const Register tmp1 = r12;
3178 const Register tmp2 = r13;
3179 const Register tmp3 = r14;
3180 const Register tmp4 = r15;
3181 const Register tmp5 = rbx;
3182
3183 BLOCK_COMMENT("Entry:");
3184 __ enter(); // required for proper stackwalking of RuntimeStub frame
3185
3186 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3187 // zlen => rcx
3188 // r9 and r10 may be used to save non-volatile registers
3189 __ movptr(r8, rdx);
3190 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3191
3192 restore_arg_regs();
3193
3194 __ leave(); // required for proper stackwalking of RuntimeStub frame
3195 __ ret(0);
3196
3197 SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3198 return start;
3199 }
3200
3201 address StubGenerator::generate_method_entry_barrier() {
3202 __ align(CodeEntryAlignment);
3203 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3204 address start = __ pc();
3205
3206 Label deoptimize_label;
3207
3208 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3209
3210 BLOCK_COMMENT("Entry:");
3211 __ enter(); // save rbp
3212
3213 // save c_rarg0, because we want to use that value.
3214 // We could do without it but then we depend on the number of slots used by pusha
3215 __ push(c_rarg0);
3216
3217 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3275 }
3276
3277 /**
3278 * Arguments:
3279 *
3280 * Input:
3281 * c_rarg0 - out address
3282 * c_rarg1 - in address
3283 * c_rarg2 - offset
3284 * c_rarg3 - len
3285 * not Win64
3286 * c_rarg4 - k
3287 * Win64
3288 * rsp+40 - k
3289 */
3290 address StubGenerator::generate_mulAdd() {
3291 __ align(CodeEntryAlignment);
3292 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3293 address start = __ pc();
3294
3295 if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3296 return start;
3297 }
3298
3299 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3300 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3301 const Register out = rdi;
3302 const Register in = rsi;
3303 const Register offset = r11;
3304 const Register len = rcx;
3305 const Register k = r8;
3306
3307 // Next registers will be saved on stack in mul_add().
3308 const Register tmp1 = r12;
3309 const Register tmp2 = r13;
3310 const Register tmp3 = r14;
3311 const Register tmp4 = r15;
3312 const Register tmp5 = rbx;
3313
3314 BLOCK_COMMENT("Entry:");
3315 __ enter(); // required for proper stackwalking of RuntimeStub frame
3316
3317 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3318 // len => rcx, k => r8
3319 // r9 and r10 may be used to save non-volatile registers
3320 #ifdef _WIN64
3321 // last argument is on stack on Win64
3322 __ movl(k, Address(rsp, 6 * wordSize));
3323 #endif
3324 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3325 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3326
3327 restore_arg_regs();
3328
3329 __ leave(); // required for proper stackwalking of RuntimeStub frame
3330 __ ret(0);
3331
3332 SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3333 return start;
3334 }
3335
3336 address StubGenerator::generate_bigIntegerRightShift() {
3337 __ align(CodeEntryAlignment);
3338 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3339 address start = __ pc();
3340
3341 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3342 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3343 const Register newArr = rdi;
3344 const Register oldArr = rsi;
3345 const Register newIdx = rdx;
3346 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3347 const Register totalNumIter = r8;
3348
3349 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3350 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3351 const Register tmp1 = r11; // Caller save.
3352 const Register tmp2 = rax; // Caller save.
|