< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp

Print this page


   1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "logging/log.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "oops/compiledICHolder.hpp"
  37 #include "runtime/safepointMechanism.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "utilities/align.hpp"

  41 #include "vmreg_x86.inline.hpp"
  42 #ifdef COMPILER1
  43 #include "c1/c1_Runtime1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "opto/runtime.hpp"
  47 #endif
  48 #include "vm_version_x86.hpp"




  49 
  50 #define __ masm->
  51 
  52 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  53 
  54 class RegisterSaver {
  55   // Capture info about frame layout
  56 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
  57   enum layout {
  58                 fpu_state_off = 0,
  59                 fpu_state_end = fpu_state_off+FPUStateSizeInWords,
  60                 st0_off, st0H_off,
  61                 st1_off, st1H_off,
  62                 st2_off, st2H_off,
  63                 st3_off, st3H_off,
  64                 st4_off, st4H_off,
  65                 st5_off, st5H_off,
  66                 st6_off, st6H_off,
  67                 st7_off, st7H_off,
  68                 xmm_off,


1821   } else {
1822     __ empty_FPU_stack();
1823   }
1824 #endif /* COMPILER2 */
1825 
1826   // Compute the rbp, offset for any slots used after the jni call
1827 
1828   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1829 
1830   // We use rdi as a thread pointer because it is callee save and
1831   // if we load it once it is usable thru the entire wrapper
1832   const Register thread = rdi;
1833 
1834   // We use rsi as the oop handle for the receiver/klass
1835   // It is callee save so it survives the call to native
1836 
1837   const Register oop_handle_reg = rsi;
1838 
1839   __ get_thread(thread);
1840 
1841   if (is_critical_native) {
1842     check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
1843                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1844   }
1845 
1846   //
1847   // We immediately shuffle the arguments so that any vm call we have to
1848   // make from here on out (sync slow path, jvmti, etc.) we will have
1849   // captured the oops from our caller and have a valid oopMap for
1850   // them.
1851 
1852   // -----------------
1853   // The Grand Shuffle
1854   //
1855   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1856   // and, if static, the class mirror instead of a receiver.  This pretty much
1857   // guarantees that register layout will not match (and x86 doesn't use reg
1858   // parms though amd does).  Since the native abi doesn't use register args
1859   // and the java conventions does we don't have to worry about collisions.
1860   // All of our moved are reg->stack or stack->stack.
1861   // We ignore the extra arguments during the shuffle and handle them at the
1862   // last moment. The shuffle is described by the two calling convention
1863   // vectors we have in our possession. We simply walk the java vector to
1864   // get the source locations and the c vector to get the destinations.
1865 
1866   int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
1867 
1868   // Record rsp-based slot for receiver on stack for non-static methods
1869   int receiver_offset = -1;
1870 
1871   // This is a trick. We double the stack slots so we can claim
1872   // the oops in the caller's frame. Since we are sure to have
1873   // more args than the caller doubling is enough to make
1874   // sure we can capture all the incoming oop args from the
1875   // caller.
1876   //
1877   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1878 






1879   // Mark location of rbp,
1880   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1881 
1882   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1883   // Are free to temporaries if we have to do  stack to steck moves.
1884   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1885 
1886   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1887     switch (in_sig_bt[i]) {
1888       case T_ARRAY:
1889         if (is_critical_native) {























1890           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);

1891           c_arg++;
1892           break;
1893         }
1894       case T_OBJECT:
1895         assert(!is_critical_native, "no oop arguments");
1896         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1897                     ((i == 0) && (!is_static)),
1898                     &receiver_offset);
1899         break;
1900       case T_VOID:
1901         break;
1902 
1903       case T_FLOAT:
1904         float_move(masm, in_regs[i], out_regs[c_arg]);
1905           break;
1906 
1907       case T_DOUBLE:
1908         assert( i + 1 < total_in_args &&
1909                 in_sig_bt[i + 1] == T_VOID &&
1910                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");


2066 
2067   // Unpack native results.
2068   switch (ret_type) {
2069   case T_BOOLEAN: __ c2bool(rax);            break;
2070   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2071   case T_BYTE   : __ sign_extend_byte (rax); break;
2072   case T_SHORT  : __ sign_extend_short(rax); break;
2073   case T_INT    : /* nothing to do */        break;
2074   case T_DOUBLE :
2075   case T_FLOAT  :
2076     // Result is in st0 we'll save as needed
2077     break;
2078   case T_ARRAY:                 // Really a handle
2079   case T_OBJECT:                // Really a handle
2080       break; // can't de-handlize until after safepoint check
2081   case T_VOID: break;
2082   case T_LONG: break;
2083   default       : ShouldNotReachHere();
2084   }
2085 























2086   // Switch thread to "native transition" state before reading the synchronization state.
2087   // This additional state is necessary because reading and testing the synchronization
2088   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2089   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2090   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2091   //     Thread A is resumed to finish this native method, but doesn't block here since it
2092   //     didn't see any synchronization is progress, and escapes.
2093   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2094 
2095   if(os::is_MP()) {
2096     if (UseMembar) {
2097       // Force this write out before the read below
2098       __ membar(Assembler::Membar_mask_bits(
2099            Assembler::LoadLoad | Assembler::LoadStore |
2100            Assembler::StoreLoad | Assembler::StoreStore));
2101     } else {
2102       // Write serialization page so VM thread can do a pseudo remote membar.
2103       // We use the current thread pointer to calculate a thread specific
2104       // offset to write to within the page. This minimizes bus traffic
2105       // due to cache line collision.


   1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "logging/log.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "oops/compiledICHolder.hpp"
  37 #include "runtime/safepointMechanism.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "utilities/align.hpp"
  41 #include "utilities/macros.hpp"
  42 #include "vmreg_x86.inline.hpp"
  43 #ifdef COMPILER1
  44 #include "c1/c1_Runtime1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "opto/runtime.hpp"
  48 #endif
  49 #include "vm_version_x86.hpp"
  50 #if INCLUDE_SHENANDOAHGC
  51 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  52 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  53 #endif
  54 
  55 #define __ masm->
  56 
  57 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  58 
  59 class RegisterSaver {
  60   // Capture info about frame layout
  61 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
  62   enum layout {
  63                 fpu_state_off = 0,
  64                 fpu_state_end = fpu_state_off+FPUStateSizeInWords,
  65                 st0_off, st0H_off,
  66                 st1_off, st1H_off,
  67                 st2_off, st2H_off,
  68                 st3_off, st3H_off,
  69                 st4_off, st4H_off,
  70                 st5_off, st5H_off,
  71                 st6_off, st6H_off,
  72                 st7_off, st7H_off,
  73                 xmm_off,


1826   } else {
1827     __ empty_FPU_stack();
1828   }
1829 #endif /* COMPILER2 */
1830 
1831   // Compute the rbp, offset for any slots used after the jni call
1832 
1833   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1834 
1835   // We use rdi as a thread pointer because it is callee save and
1836   // if we load it once it is usable thru the entire wrapper
1837   const Register thread = rdi;
1838 
1839   // We use rsi as the oop handle for the receiver/klass
1840   // It is callee save so it survives the call to native
1841 
1842   const Register oop_handle_reg = rsi;
1843 
1844   __ get_thread(thread);
1845 
1846   if (is_critical_native SHENANDOAHGC_ONLY(&& !UseShenandoahGC)) {
1847     check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
1848                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1849   }
1850 
1851   //
1852   // We immediately shuffle the arguments so that any vm call we have to
1853   // make from here on out (sync slow path, jvmti, etc.) we will have
1854   // captured the oops from our caller and have a valid oopMap for
1855   // them.
1856 
1857   // -----------------
1858   // The Grand Shuffle
1859   //
1860   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1861   // and, if static, the class mirror instead of a receiver.  This pretty much
1862   // guarantees that register layout will not match (and x86 doesn't use reg
1863   // parms though amd does).  Since the native abi doesn't use register args
1864   // and the java conventions does we don't have to worry about collisions.
1865   // All of our moved are reg->stack or stack->stack.
1866   // We ignore the extra arguments during the shuffle and handle them at the
1867   // last moment. The shuffle is described by the two calling convention
1868   // vectors we have in our possession. We simply walk the java vector to
1869   // get the source locations and the c vector to get the destinations.
1870 
1871   int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
1872 
1873   // Record rsp-based slot for receiver on stack for non-static methods
1874   int receiver_offset = -1;
1875 
1876   // This is a trick. We double the stack slots so we can claim
1877   // the oops in the caller's frame. Since we are sure to have
1878   // more args than the caller doubling is enough to make
1879   // sure we can capture all the incoming oop args from the
1880   // caller.
1881   //
1882   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1883 
1884 #if INCLUDE_SHENANDOAHGC
1885   // Inbound arguments that need to be pinned for critical natives
1886   GrowableArray<int> pinned_args(total_in_args);
1887   // Current stack slot for storing register based array argument
1888   int pinned_slot = oop_handle_offset;
1889 #endif
1890   // Mark location of rbp,
1891   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1892 
1893   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1894   // Are free to temporaries if we have to do  stack to steck moves.
1895   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1896 
1897   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1898     switch (in_sig_bt[i]) {
1899       case T_ARRAY:
1900         if (is_critical_native) {
1901 #if INCLUDE_SHENANDOAHGC
1902           VMRegPair in_arg = in_regs[i];
1903           if (UseShenandoahGC) {
1904             // gen_pin_object handles save and restore
1905             // of any clobbered registers
1906             ShenandoahBarrierSet::assembler()->gen_pin_object(masm, thread, in_arg);
1907             pinned_args.append(i);
1908 
1909             // rax has pinned array
1910             VMRegPair result_reg(rax->as_VMReg());
1911             if (!in_arg.first()->is_stack()) {
1912               assert(pinned_slot <= stack_slots, "overflow");
1913               simple_move32(masm, result_reg, VMRegImpl::stack2reg(pinned_slot));
1914               pinned_slot += VMRegImpl::slots_per_word;
1915             } else {
1916               // Write back pinned value, it will be used to unpin this argument
1917               __ movptr(Address(rbp, reg2offset_in(in_arg.first())), result_reg.first()->as_Register());
1918             }
1919             // We have the array in register, use it
1920             in_arg = result_reg;
1921           }
1922           unpack_array_argument(masm, in_arg, in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1923 #else
1924           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1925 #endif
1926           c_arg++;
1927           break;
1928         }
1929       case T_OBJECT:
1930         assert(!is_critical_native, "no oop arguments");
1931         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1932                     ((i == 0) && (!is_static)),
1933                     &receiver_offset);
1934         break;
1935       case T_VOID:
1936         break;
1937 
1938       case T_FLOAT:
1939         float_move(masm, in_regs[i], out_regs[c_arg]);
1940           break;
1941 
1942       case T_DOUBLE:
1943         assert( i + 1 < total_in_args &&
1944                 in_sig_bt[i + 1] == T_VOID &&
1945                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");


2101 
2102   // Unpack native results.
2103   switch (ret_type) {
2104   case T_BOOLEAN: __ c2bool(rax);            break;
2105   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2106   case T_BYTE   : __ sign_extend_byte (rax); break;
2107   case T_SHORT  : __ sign_extend_short(rax); break;
2108   case T_INT    : /* nothing to do */        break;
2109   case T_DOUBLE :
2110   case T_FLOAT  :
2111     // Result is in st0 we'll save as needed
2112     break;
2113   case T_ARRAY:                 // Really a handle
2114   case T_OBJECT:                // Really a handle
2115       break; // can't de-handlize until after safepoint check
2116   case T_VOID: break;
2117   case T_LONG: break;
2118   default       : ShouldNotReachHere();
2119   }
2120 
2121 #if INCLUDE_SHENANDOAHGC
2122   if (UseShenandoahGC) {
2123     // unpin pinned arguments
2124     pinned_slot = oop_handle_offset;
2125     if (pinned_args.length() > 0) {
2126       // save return value that may be overwritten otherwise.
2127       save_native_result(masm, ret_type, stack_slots);
2128       for (int index = 0; index < pinned_args.length(); index ++) {
2129         int i = pinned_args.at(index);
2130         assert(pinned_slot <= stack_slots, "overflow");
2131         if (!in_regs[i].first()->is_stack()) {
2132           int offset = pinned_slot * VMRegImpl::stack_slot_size;
2133           __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
2134           pinned_slot += VMRegImpl::slots_per_word;
2135         }
2136         // gen_pin_object handles save and restore
2137         // of any other clobbered registers
2138         ShenandoahBarrierSet::assembler()->gen_unpin_object(masm, thread, in_regs[i]);
2139       }
2140       restore_native_result(masm, ret_type, stack_slots);
2141     }
2142   }
2143 #endif
2144   // Switch thread to "native transition" state before reading the synchronization state.
2145   // This additional state is necessary because reading and testing the synchronization
2146   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2147   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2148   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2149   //     Thread A is resumed to finish this native method, but doesn't block here since it
2150   //     didn't see any synchronization is progress, and escapes.
2151   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2152 
2153   if(os::is_MP()) {
2154     if (UseMembar) {
2155       // Force this write out before the read below
2156       __ membar(Assembler::Membar_mask_bits(
2157            Assembler::LoadLoad | Assembler::LoadStore |
2158            Assembler::StoreLoad | Assembler::StoreStore));
2159     } else {
2160       // Write serialization page so VM thread can do a pseudo remote membar.
2161       // We use the current thread pointer to calculate a thread specific
2162       // offset to write to within the page. This minimizes bus traffic
2163       // due to cache line collision.


< prev index next >