9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/vmIntrinsics.hpp"
29 #include "compiler/oopMap.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/barrierSetNMethod.hpp"
33 #include "gc/shared/gc_globals.hpp"
34 #include "memory/universe.hpp"
35 #include "prims/jvmtiExport.hpp"
36 #include "prims/upcallLinker.hpp"
37 #include "runtime/arguments.hpp"
38 #include "runtime/continuationEntry.hpp"
39 #include "runtime/javaThread.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "stubGenerator_x86_64.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #include "opto/c2_globals.hpp"
46 #endif
47 #if INCLUDE_JVMCI
48 #include "jvmci/jvmci_globals.hpp"
3036
3037
3038 /**
3039 * Arguments:
3040 *
3041 * Input:
3042 * c_rarg0 - x address
3043 * c_rarg1 - x length
3044 * c_rarg2 - y address
3045 * c_rarg3 - y length
3046 * not Win64
3047 * c_rarg4 - z address
3048 * Win64
3049 * rsp+40 - z address
3050 */
3051 address StubGenerator::generate_multiplyToLen() {
3052 __ align(CodeEntryAlignment);
3053 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3054 address start = __ pc();
3055
3056 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3057 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3058 const Register x = rdi;
3059 const Register xlen = rax;
3060 const Register y = rsi;
3061 const Register ylen = rcx;
3062 const Register z = r8;
3063
3064 // Next registers will be saved on stack in multiply_to_len().
3065 const Register tmp0 = r11;
3066 const Register tmp1 = r12;
3067 const Register tmp2 = r13;
3068 const Register tmp3 = r14;
3069 const Register tmp4 = r15;
3070 const Register tmp5 = rbx;
3071
3072 BLOCK_COMMENT("Entry:");
3073 __ enter(); // required for proper stackwalking of RuntimeStub frame
3074
3075 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3076 // ylen => rcx, z => r8
3077 // r9 and r10 may be used to save non-volatile registers
3078 #ifdef _WIN64
3079 // last argument (#4) is on stack on Win64
3080 __ movptr(z, Address(rsp, 6 * wordSize));
3081 #endif
3082
3083 __ movptr(xlen, rsi);
3084 __ movptr(y, rdx);
3085 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3086
3087 restore_arg_regs();
3088
3089 __ leave(); // required for proper stackwalking of RuntimeStub frame
3090 __ ret(0);
3091
3092 return start;
3093 }
3094
3095 /**
3096 * Arguments:
3097 *
3098 * Input:
3099 * c_rarg0 - obja address
3100 * c_rarg1 - objb address
3101 * c_rarg3 - length length
3102 * c_rarg4 - scale log2_array_indxscale
3103 *
3104 * Output:
3105 * rax - int >= mismatched index, < 0 bitwise complement of tail
3106 */
3107 address StubGenerator::generate_vectorizedMismatch() {
3108 __ align(CodeEntryAlignment);
3109 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3110 address start = __ pc();
3111
3143
3144 return start;
3145 }
3146
3147 /**
3148 * Arguments:
3149 *
3150 // Input:
3151 // c_rarg0 - x address
3152 // c_rarg1 - x length
3153 // c_rarg2 - z address
3154 // c_rarg3 - z length
3155 *
3156 */
3157 address StubGenerator::generate_squareToLen() {
3158
3159 __ align(CodeEntryAlignment);
3160 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3161 address start = __ pc();
3162
3163 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3164 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3165 const Register x = rdi;
3166 const Register len = rsi;
3167 const Register z = r8;
3168 const Register zlen = rcx;
3169
3170 const Register tmp1 = r12;
3171 const Register tmp2 = r13;
3172 const Register tmp3 = r14;
3173 const Register tmp4 = r15;
3174 const Register tmp5 = rbx;
3175
3176 BLOCK_COMMENT("Entry:");
3177 __ enter(); // required for proper stackwalking of RuntimeStub frame
3178
3179 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3180 // zlen => rcx
3181 // r9 and r10 may be used to save non-volatile registers
3182 __ movptr(r8, rdx);
3183 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3184
3185 restore_arg_regs();
3186
3187 __ leave(); // required for proper stackwalking of RuntimeStub frame
3188 __ ret(0);
3189
3190 return start;
3191 }
3192
3193 address StubGenerator::generate_method_entry_barrier() {
3194 __ align(CodeEntryAlignment);
3195 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3196 address start = __ pc();
3197
3198 Label deoptimize_label;
3199
3200 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3201
3202 BLOCK_COMMENT("Entry:");
3203 __ enter(); // save rbp
3204
3205 // save c_rarg0, because we want to use that value.
3206 // We could do without it but then we depend on the number of slots used by pusha
3207 __ push(c_rarg0);
3208
3209 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3267 }
3268
3269 /**
3270 * Arguments:
3271 *
3272 * Input:
3273 * c_rarg0 - out address
3274 * c_rarg1 - in address
3275 * c_rarg2 - offset
3276 * c_rarg3 - len
3277 * not Win64
3278 * c_rarg4 - k
3279 * Win64
3280 * rsp+40 - k
3281 */
3282 address StubGenerator::generate_mulAdd() {
3283 __ align(CodeEntryAlignment);
3284 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3285 address start = __ pc();
3286
3287 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3288 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3289 const Register out = rdi;
3290 const Register in = rsi;
3291 const Register offset = r11;
3292 const Register len = rcx;
3293 const Register k = r8;
3294
3295 // Next registers will be saved on stack in mul_add().
3296 const Register tmp1 = r12;
3297 const Register tmp2 = r13;
3298 const Register tmp3 = r14;
3299 const Register tmp4 = r15;
3300 const Register tmp5 = rbx;
3301
3302 BLOCK_COMMENT("Entry:");
3303 __ enter(); // required for proper stackwalking of RuntimeStub frame
3304
3305 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3306 // len => rcx, k => r8
3307 // r9 and r10 may be used to save non-volatile registers
3308 #ifdef _WIN64
3309 // last argument is on stack on Win64
3310 __ movl(k, Address(rsp, 6 * wordSize));
3311 #endif
3312 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3313 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3314
3315 restore_arg_regs();
3316
3317 __ leave(); // required for proper stackwalking of RuntimeStub frame
3318 __ ret(0);
3319
3320 return start;
3321 }
3322
3323 address StubGenerator::generate_bigIntegerRightShift() {
3324 __ align(CodeEntryAlignment);
3325 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3326 address start = __ pc();
3327
3328 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3329 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3330 const Register newArr = rdi;
3331 const Register oldArr = rsi;
3332 const Register newIdx = rdx;
3333 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3334 const Register totalNumIter = r8;
3335
3336 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3337 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3338 const Register tmp1 = r11; // Caller save.
3339 const Register tmp2 = rax; // Caller save.
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/vmIntrinsics.hpp"
29 #include "code/SCCache.hpp"
30 #include "compiler/oopMap.hpp"
31 #include "gc/shared/barrierSet.hpp"
32 #include "gc/shared/barrierSetAssembler.hpp"
33 #include "gc/shared/barrierSetNMethod.hpp"
34 #include "gc/shared/gc_globals.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/jvmtiExport.hpp"
37 #include "prims/upcallLinker.hpp"
38 #include "runtime/arguments.hpp"
39 #include "runtime/continuationEntry.hpp"
40 #include "runtime/javaThread.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "stubGenerator_x86_64.hpp"
44 #ifdef COMPILER2
45 #include "opto/runtime.hpp"
46 #include "opto/c2_globals.hpp"
47 #endif
48 #if INCLUDE_JVMCI
49 #include "jvmci/jvmci_globals.hpp"
3037
3038
3039 /**
3040 * Arguments:
3041 *
3042 * Input:
3043 * c_rarg0 - x address
3044 * c_rarg1 - x length
3045 * c_rarg2 - y address
3046 * c_rarg3 - y length
3047 * not Win64
3048 * c_rarg4 - z address
3049 * Win64
3050 * rsp+40 - z address
3051 */
3052 address StubGenerator::generate_multiplyToLen() {
3053 __ align(CodeEntryAlignment);
3054 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
3055 address start = __ pc();
3056
3057 if (SCCache::load_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start)) {
3058 return start;
3059 }
3060
3061 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3062 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3063 const Register x = rdi;
3064 const Register xlen = rax;
3065 const Register y = rsi;
3066 const Register ylen = rcx;
3067 const Register z = r8;
3068
3069 // Next registers will be saved on stack in multiply_to_len().
3070 const Register tmp0 = r11;
3071 const Register tmp1 = r12;
3072 const Register tmp2 = r13;
3073 const Register tmp3 = r14;
3074 const Register tmp4 = r15;
3075 const Register tmp5 = rbx;
3076
3077 BLOCK_COMMENT("Entry:");
3078 __ enter(); // required for proper stackwalking of RuntimeStub frame
3079
3080 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
3081 // ylen => rcx, z => r8
3082 // r9 and r10 may be used to save non-volatile registers
3083 #ifdef _WIN64
3084 // last argument (#4) is on stack on Win64
3085 __ movptr(z, Address(rsp, 6 * wordSize));
3086 #endif
3087
3088 __ movptr(xlen, rsi);
3089 __ movptr(y, rdx);
3090 __ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3091
3092 restore_arg_regs();
3093
3094 __ leave(); // required for proper stackwalking of RuntimeStub frame
3095 __ ret(0);
3096
3097 SCCache::store_stub(this, vmIntrinsics::_multiplyToLen, "multiplyToLen", start);
3098 return start;
3099 }
3100
3101 /**
3102 * Arguments:
3103 *
3104 * Input:
3105 * c_rarg0 - obja address
3106 * c_rarg1 - objb address
3107 * c_rarg3 - length length
3108 * c_rarg4 - scale log2_array_indxscale
3109 *
3110 * Output:
3111 * rax - int >= mismatched index, < 0 bitwise complement of tail
3112 */
3113 address StubGenerator::generate_vectorizedMismatch() {
3114 __ align(CodeEntryAlignment);
3115 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
3116 address start = __ pc();
3117
3149
3150 return start;
3151 }
3152
3153 /**
3154 * Arguments:
3155 *
3156 // Input:
3157 // c_rarg0 - x address
3158 // c_rarg1 - x length
3159 // c_rarg2 - z address
3160 // c_rarg3 - z length
3161 *
3162 */
3163 address StubGenerator::generate_squareToLen() {
3164
3165 __ align(CodeEntryAlignment);
3166 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3167 address start = __ pc();
3168
3169 if (SCCache::load_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start)) {
3170 return start;
3171 }
3172
3173 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3174 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
3175 const Register x = rdi;
3176 const Register len = rsi;
3177 const Register z = r8;
3178 const Register zlen = rcx;
3179
3180 const Register tmp1 = r12;
3181 const Register tmp2 = r13;
3182 const Register tmp3 = r14;
3183 const Register tmp4 = r15;
3184 const Register tmp5 = rbx;
3185
3186 BLOCK_COMMENT("Entry:");
3187 __ enter(); // required for proper stackwalking of RuntimeStub frame
3188
3189 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
3190 // zlen => rcx
3191 // r9 and r10 may be used to save non-volatile registers
3192 __ movptr(r8, rdx);
3193 __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3194
3195 restore_arg_regs();
3196
3197 __ leave(); // required for proper stackwalking of RuntimeStub frame
3198 __ ret(0);
3199
3200 SCCache::store_stub(this, vmIntrinsics::_squareToLen, "squareToLen", start);
3201 return start;
3202 }
3203
3204 address StubGenerator::generate_method_entry_barrier() {
3205 __ align(CodeEntryAlignment);
3206 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
3207 address start = __ pc();
3208
3209 Label deoptimize_label;
3210
3211 __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
3212
3213 BLOCK_COMMENT("Entry:");
3214 __ enter(); // save rbp
3215
3216 // save c_rarg0, because we want to use that value.
3217 // We could do without it but then we depend on the number of slots used by pusha
3218 __ push(c_rarg0);
3219
3220 __ lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
3278 }
3279
3280 /**
3281 * Arguments:
3282 *
3283 * Input:
3284 * c_rarg0 - out address
3285 * c_rarg1 - in address
3286 * c_rarg2 - offset
3287 * c_rarg3 - len
3288 * not Win64
3289 * c_rarg4 - k
3290 * Win64
3291 * rsp+40 - k
3292 */
3293 address StubGenerator::generate_mulAdd() {
3294 __ align(CodeEntryAlignment);
3295 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3296 address start = __ pc();
3297
3298 if (SCCache::load_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start)) {
3299 return start;
3300 }
3301
3302 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3303 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3304 const Register out = rdi;
3305 const Register in = rsi;
3306 const Register offset = r11;
3307 const Register len = rcx;
3308 const Register k = r8;
3309
3310 // Next registers will be saved on stack in mul_add().
3311 const Register tmp1 = r12;
3312 const Register tmp2 = r13;
3313 const Register tmp3 = r14;
3314 const Register tmp4 = r15;
3315 const Register tmp5 = rbx;
3316
3317 BLOCK_COMMENT("Entry:");
3318 __ enter(); // required for proper stackwalking of RuntimeStub frame
3319
3320 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
3321 // len => rcx, k => r8
3322 // r9 and r10 may be used to save non-volatile registers
3323 #ifdef _WIN64
3324 // last argument is on stack on Win64
3325 __ movl(k, Address(rsp, 6 * wordSize));
3326 #endif
3327 __ movptr(r11, rdx); // move offset in rdx to offset(r11)
3328 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
3329
3330 restore_arg_regs();
3331
3332 __ leave(); // required for proper stackwalking of RuntimeStub frame
3333 __ ret(0);
3334
3335 SCCache::store_stub(this, vmIntrinsics::_mulAdd, "mulAdd", start);
3336 return start;
3337 }
3338
3339 address StubGenerator::generate_bigIntegerRightShift() {
3340 __ align(CodeEntryAlignment);
3341 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
3342 address start = __ pc();
3343
3344 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
3345 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
3346 const Register newArr = rdi;
3347 const Register oldArr = rsi;
3348 const Register newIdx = rdx;
3349 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
3350 const Register totalNumIter = r8;
3351
3352 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
3353 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
3354 const Register tmp1 = r11; // Caller save.
3355 const Register tmp2 = rax; // Caller save.
|