1 /*
  2  * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.hpp"
 27 #include "compiler/disassembler.hpp"
 28 #include "interpreter/interp_masm.hpp"
 29 #include "interpreter/interpreter.hpp"
 30 #include "interpreter/interpreterRuntime.hpp"
 31 #include "interpreter/templateInterpreterGenerator.hpp"
 32 #include "runtime/sharedRuntime.hpp"
 33 #include "runtime/stubRoutines.hpp"
 34 
 35 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
 36 
 37 #ifdef _WIN64
 38 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
 39   address entry = __ pc();
 40 
 41   // rbx: method
 42   // r14: pointer to locals
 43   // c_rarg3: first stack arg - wordSize
 44   __ mov(c_rarg3, rsp);
 45   // adjust rsp
 46   __ subptr(rsp, 4 * wordSize);
 47   __ call_VM(noreg,
 48              CAST_FROM_FN_PTR(address,
 49                               InterpreterRuntime::slow_signature_handler),
 50              rbx, r14, c_rarg3);
 51 
 52   // rax: result handler
 53 
 54   // Stack layout:
 55   // rsp: 3 integer or float args (if static first is unused)
 56   //      1 float/double identifiers
 57   //        return address
 58   //        stack args
 59   //        garbage
 60   //        expression stack bottom
 61   //        bcp (NULL)
 62   //        ...
 63 
 64   // Do FP first so we can use c_rarg3 as temp
 65   __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
 66 
 67   for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
 68     XMMRegister floatreg = as_XMMRegister(i+1);
 69     Label isfloatordouble, isdouble, next;
 70 
 71     __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
 72     __ jcc(Assembler::notZero, isfloatordouble);
 73 
 74     // Do Int register here
 75     switch ( i ) {
 76       case 0:
 77         __ movl(rscratch1, Address(rbx, Method::access_flags_offset()));
 78         __ testl(rscratch1, JVM_ACC_STATIC);
 79         __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
 80         break;
 81       case 1:
 82         __ movptr(c_rarg2, Address(rsp, wordSize));
 83         break;
 84       case 2:
 85         __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
 86         break;
 87       default:
 88         break;
 89     }
 90 
 91     __ jmp (next);
 92 
 93     __ bind(isfloatordouble);
 94     __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
 95     __ jcc(Assembler::notZero, isdouble);
 96 
 97 // Do Float Here
 98     __ movflt(floatreg, Address(rsp, i * wordSize));
 99     __ jmp(next);
100 
101 // Do Double here
102     __ bind(isdouble);
103     __ movdbl(floatreg, Address(rsp, i * wordSize));
104 
105     __ bind(next);
106   }
107 
108 
109   // restore rsp
110   __ addptr(rsp, 4 * wordSize);
111 
112   __ ret(0);
113 
114   return entry;
115 }
116 #else
117 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
118   address entry = __ pc();
119 
120   // rbx: method
121   // r14: pointer to locals
122   // c_rarg3: first stack arg - wordSize
123   __ mov(c_rarg3, rsp);
124   // adjust rsp
125   __ subptr(rsp, 14 * wordSize);
126   __ call_VM(noreg,
127              CAST_FROM_FN_PTR(address,
128                               InterpreterRuntime::slow_signature_handler),
129              rbx, r14, c_rarg3);
130 
131   // rax: result handler
132 
133   // Stack layout:
134   // rsp: 5 integer args (if static first is unused)
135   //      1 float/double identifiers
136   //      8 double args
137   //        return address
138   //        stack args
139   //        garbage
140   //        expression stack bottom
141   //        bcp (NULL)
142   //        ...
143 
144   // Do FP first so we can use c_rarg3 as temp
145   __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
146 
147   for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
148     const XMMRegister r = as_XMMRegister(i);
149 
150     Label d, done;
151 
152     __ testl(c_rarg3, 1 << i);
153     __ jcc(Assembler::notZero, d);
154     __ movflt(r, Address(rsp, (6 + i) * wordSize));
155     __ jmp(done);
156     __ bind(d);
157     __ movdbl(r, Address(rsp, (6 + i) * wordSize));
158     __ bind(done);
159   }
160 
161   // Now handle integrals.  Only do c_rarg1 if not static.
162   __ movl(c_rarg3, Address(rbx, Method::access_flags_offset()));
163   __ testl(c_rarg3, JVM_ACC_STATIC);
164   __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
165 
166   __ movptr(c_rarg2, Address(rsp, wordSize));
167   __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
168   __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
169   __ movptr(c_rarg5, Address(rsp, 4 * wordSize));
170 
171   // restore rsp
172   __ addptr(rsp, 14 * wordSize);
173 
174   __ ret(0);
175 
176   return entry;
177 }
178 #endif  // __WIN64
179 
180 /**
181  * Method entry for static native methods:
182  *   int java.util.zip.CRC32.update(int crc, int b)
183  */
184 address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
185   if (UseCRC32Intrinsics) {
186     address entry = __ pc();
187 
188     // rbx,: Method*
189     // r13: senderSP must preserved for slow path, set SP to it on fast path
190     // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
191     // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
192 
193     Label slow_path;
194     __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */);
195 
196     // We don't generate local frame and don't align stack because
197     // we call stub code and there is no safepoint on this path.
198 
199     // Load parameters
200     const Register crc = rax;  // crc
201     const Register val = c_rarg0;  // source java byte value
202     const Register tbl = c_rarg1;  // scratch
203 
204     // Arguments are reversed on java expression stack
205     __ movl(val, Address(rsp,   wordSize)); // byte value
206     __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
207 
208     __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
209     __ notl(crc); // ~crc
210     __ update_byte_crc32(crc, val, tbl);
211     __ notl(crc); // ~crc
212     // result in rax
213 
214     // _areturn
215     __ pop(rdi);                // get return address
216     __ mov(rsp, r13);           // set sp to sender sp
217     __ jmp(rdi);
218 
219     // generate a vanilla native entry as the slow path
220     __ bind(slow_path);
221     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
222     return entry;
223   }
224   return NULL;
225 }
226 
227 /**
228  * Method entry for static native methods:
229  *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
230  *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
231  */
232 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
233   if (UseCRC32Intrinsics) {
234     address entry = __ pc();
235 
236     // rbx,: Method*
237     // r13: senderSP must preserved for slow path, set SP to it on fast path
238 
239     Label slow_path;
240     __ safepoint_poll(slow_path, r15_thread, false /* at_return */, false /* in_nmethod */);
241 
242     // We don't generate local frame and don't align stack because
243     // we call stub code and there is no safepoint on this path.
244 
245     // Load parameters
246     const Register crc = c_rarg0;  // crc
247     const Register buf = c_rarg1;  // source java byte array address
248     const Register len = c_rarg2;  // length
249     const Register off = len;      // offset (never overlaps with 'len')
250 
251     // Arguments are reversed on java expression stack
252     // Calculate address of start element
253     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
254       __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
255       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
256       __ addq(buf, off); // + offset
257       __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
258     } else {
259       __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
260       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
261       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
262       __ addq(buf, off); // + offset
263       __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
264     }
265     // Can now load 'len' since we're finished with 'off'
266     __ movl(len, Address(rsp, wordSize)); // Length
267 
268     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
269     // result in rax
270 
271     // _areturn
272     __ pop(rdi);                // get return address
273     __ mov(rsp, r13);           // set sp to sender sp
274     __ jmp(rdi);
275 
276     // generate a vanilla native entry as the slow path
277     __ bind(slow_path);
278     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
279     return entry;
280   }
281   return NULL;
282 }
283 
284 /**
285 * Method entry for static (non-native) methods:
286 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
287 *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
288 */
289 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
290   if (UseCRC32CIntrinsics) {
291     address entry = __ pc();
292     // Load parameters
293     const Register crc = c_rarg0;  // crc
294     const Register buf = c_rarg1;  // source java byte array address
295     const Register len = c_rarg2;
296     const Register off = c_rarg3;  // offset
297     const Register end = len;
298 
299     // Arguments are reversed on java expression stack
300     // Calculate address of start element
301     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
302       __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
303       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
304       __ addq(buf, off); // + offset
305       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
306       // Note on 5 * wordSize vs. 4 * wordSize:
307       // *   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
308       //                                                   4         2,3          1        0
309       // end starts at SP + 8
310       // The Java(R) Virtual Machine Specification Java SE 7 Edition
311       // 4.10.2.3. Values of Types long and double
312       //    "When calculating operand stack length, values of type long and double have length two."
313     } else {
314       __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
315       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
316       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
317       __ addq(buf, off); // + offset
318       __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
319     }
320     __ movl(end, Address(rsp, wordSize)); // end
321     __ subl(end, off); // end - off
322     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
323     // result in rax
324     // _areturn
325     __ pop(rdi);                // get return address
326     __ mov(rsp, r13);           // set sp to sender sp
327     __ jmp(rdi);
328 
329     return entry;
330   }
331 
332   return NULL;
333 }
334 
335 //
336 // Various method entries
337 //
338 
339 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
340 
341   // rbx,: Method*
342   // rcx: scratrch
343   // r13: sender sp
344 
345   if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
346 
347   address entry_point = __ pc();
348 
349   // These don't need a safepoint check because they aren't virtually
350   // callable. We won't enter these intrinsics from compiled code.
351   // If in the future we added an intrinsic which was virtually callable
352   // we'd have to worry about how to safepoint so that this code is used.
353 
354   // mathematical functions inlined by compiler
355   // (interpreter must provide identical implementation
356   // in order to avoid monotonicity bugs when switching
357   // from interpreter to compiler in the middle of some
358   // computation)
359   //
360   // stack: [ ret adr ] <-- rsp
361   //        [ lo(arg) ]
362   //        [ hi(arg) ]
363   //
364 
365   if (kind == Interpreter::java_lang_math_fmaD) {
366     if (!UseFMA) {
367       return NULL; // Generate a vanilla entry
368     }
369     __ movdbl(xmm0, Address(rsp, wordSize));
370     __ movdbl(xmm1, Address(rsp, 3 * wordSize));
371     __ movdbl(xmm2, Address(rsp, 5 * wordSize));
372     __ fmad(xmm0, xmm1, xmm2, xmm0);
373   } else if (kind == Interpreter::java_lang_math_fmaF) {
374     if (!UseFMA) {
375       return NULL; // Generate a vanilla entry
376     }
377     __ movflt(xmm0, Address(rsp, wordSize));
378     __ movflt(xmm1, Address(rsp, 2 * wordSize));
379     __ movflt(xmm2, Address(rsp, 3 * wordSize));
380     __ fmaf(xmm0, xmm1, xmm2, xmm0);
381   } else if (kind == Interpreter::java_lang_math_sqrt) {
382     __ sqrtsd(xmm0, Address(rsp, wordSize));
383   } else if (kind == Interpreter::java_lang_math_exp) {
384     __ movdbl(xmm0, Address(rsp, wordSize));
385     if (StubRoutines::dexp() != NULL) {
386       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
387     } else {
388       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
389     }
390   } else if (kind == Interpreter::java_lang_math_log) {
391     __ movdbl(xmm0, Address(rsp, wordSize));
392     if (StubRoutines::dlog() != NULL) {
393       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
394     } else {
395       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
396     }
397   } else if (kind == Interpreter::java_lang_math_log10) {
398     __ movdbl(xmm0, Address(rsp, wordSize));
399     if (StubRoutines::dlog10() != NULL) {
400       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
401     } else {
402       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
403     }
404   } else if (kind == Interpreter::java_lang_math_sin) {
405     __ movdbl(xmm0, Address(rsp, wordSize));
406     if (StubRoutines::dsin() != NULL) {
407       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
408     } else {
409       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
410     }
411   } else if (kind == Interpreter::java_lang_math_cos) {
412     __ movdbl(xmm0, Address(rsp, wordSize));
413     if (StubRoutines::dcos() != NULL) {
414       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
415     } else {
416       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
417     }
418   } else if (kind == Interpreter::java_lang_math_pow) {
419     __ movdbl(xmm1, Address(rsp, wordSize));
420     __ movdbl(xmm0, Address(rsp, 3 * wordSize));
421     if (StubRoutines::dpow() != NULL) {
422       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
423     } else {
424       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
425     }
426   } else if (kind == Interpreter::java_lang_math_tan) {
427     __ movdbl(xmm0, Address(rsp, wordSize));
428     if (StubRoutines::dtan() != NULL) {
429       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
430     } else {
431       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
432     }
433   } else if (kind == Interpreter::java_lang_math_abs) {
434     assert(StubRoutines::x86::double_sign_mask() != NULL, "not initialized");
435     __ movdbl(xmm0, Address(rsp, wordSize));
436     __ andpd(xmm0, ExternalAddress(StubRoutines::x86::double_sign_mask()));
437   } else {
438     ShouldNotReachHere();
439   }
440 
441   __ pop(rax);
442   __ mov(rsp, r13);
443   __ jmp(rax);
444 
445   return entry_point;
446 }
447 
448 address TemplateInterpreterGenerator::generate_currentThread() {
449 
450   address entry_point = __ pc();
451 
452   __ movptr(rax, Address(r15_thread, JavaThread::threadObj_offset()));
453 
454   __ resolve_oop_handle(rax, rscratch1);
455 
456   __ pop(rcx);
457   __ mov(rsp, r13);
458   __ jmp(rcx);
459 
460   return entry_point;
461 }
462