1281 } else if (in_regs[i].first()->is_XMMRegister()) { 1282 if (in_sig_bt[i] == T_FLOAT) { 1283 int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area; 1284 int offset = slot * VMRegImpl::stack_slot_size; 1285 assert(handle_index <= stack_slots, "overflow"); 1286 if (map != NULL) { 1287 __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); 1288 } else { 1289 __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); 1290 } 1291 } 1292 } else if (in_regs[i].first()->is_stack()) { 1293 if (in_sig_bt[i] == T_ARRAY && map != NULL) { 1294 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1295 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1296 } 1297 } 1298 } 1299 } 1300 1301 // Check GC_locker::needs_gc and enter the runtime if it's true. This 1302 // keeps a new JNI critical region from starting until a GC has been 1303 // forced. Save down any oops in registers and describe them in an 1304 // OopMap. 1305 static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1306 Register thread, 1307 int stack_slots, 1308 int total_c_args, 1309 int total_in_args, 1310 int arg_save_area, 1311 OopMapSet* oop_maps, 1312 VMRegPair* in_regs, 1313 BasicType* in_sig_bt) { 1314 __ block_comment("check GC_locker::needs_gc"); 1315 Label cont; 1316 __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false); 1317 __ jcc(Assembler::equal, cont); 1318 1319 // Save down any incoming oops and call into the runtime to halt for a GC 1320 1839 } else { 1840 __ empty_FPU_stack(); 1841 } 1842 #endif /* COMPILER2 */ 1843 1844 // Compute the rbp, offset for any slots used after the jni call 1845 1846 int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; 1847 1848 // We use rdi as a thread pointer because it is callee save and 1849 // if we load it once it is usable thru the entire wrapper 1850 const Register thread = rdi; 1851 1852 // We use rsi as the oop handle for the receiver/klass 1853 // It is callee save so it survives the call to native 1854 1855 const Register oop_handle_reg = rsi; 1856 1857 __ get_thread(thread); 1858 1859 if (is_critical_native) { 1860 check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args, 1861 oop_handle_offset, oop_maps, in_regs, in_sig_bt); 1862 } 1863 1864 // 1865 // We immediately shuffle the arguments so that any vm call we have to 1866 // make from here on out (sync slow path, jvmti, etc.) we will have 1867 // captured the oops from our caller and have a valid oopMap for 1868 // them. 1869 1870 // ----------------- 1871 // The Grand Shuffle 1872 // 1873 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1874 // and, if static, the class mirror instead of a receiver. This pretty much 1875 // guarantees that register layout will not match (and x86 doesn't use reg 1876 // parms though amd does). Since the native abi doesn't use register args 1877 // and the java conventions does we don't have to worry about collisions. 1878 // All of our moved are reg->stack or stack->stack. 1879 // We ignore the extra arguments during the shuffle and handle them at the 1880 // last moment. The shuffle is described by the two calling convention 1881 // vectors we have in our possession. We simply walk the java vector to 1882 // get the source locations and the c vector to get the destinations. 1883 1884 int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 ); 1885 1886 // Record rsp-based slot for receiver on stack for non-static methods 1887 int receiver_offset = -1; 1888 1889 // This is a trick. We double the stack slots so we can claim 1890 // the oops in the caller's frame. Since we are sure to have 1891 // more args than the caller doubling is enough to make 1892 // sure we can capture all the incoming oop args from the 1893 // caller. 1894 // 1895 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1896 1897 // Mark location of rbp, 1898 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg()); 1899 1900 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx 1901 // Are free to temporaries if we have to do stack to steck moves. 1902 // All inbound args are referenced based on rbp, and all outbound args via rsp. 1903 1904 for (int i = 0; i < total_in_args ; i++, c_arg++ ) { 1905 switch (in_sig_bt[i]) { 1906 case T_ARRAY: 1907 if (is_critical_native) { 1908 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); 1909 c_arg++; 1910 break; 1911 } 1912 case T_OBJECT: 1913 assert(!is_critical_native, "no oop arguments"); 1914 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1915 ((i == 0) && (!is_static)), 1916 &receiver_offset); 1917 break; 1918 case T_VOID: 1919 break; 1920 1921 case T_FLOAT: 1922 float_move(masm, in_regs[i], out_regs[c_arg]); 1923 break; 1924 1925 case T_DOUBLE: 1926 assert( i + 1 < total_in_args && 1927 in_sig_bt[i + 1] == T_VOID && 1928 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2082 // and continue to do SP relative addressing but we instead switch to FP 2083 // relative addressing. 2084 2085 // Unpack native results. 2086 switch (ret_type) { 2087 case T_BOOLEAN: __ c2bool(rax); break; 2088 case T_CHAR : __ andptr(rax, 0xFFFF); break; 2089 case T_BYTE : __ sign_extend_byte (rax); break; 2090 case T_SHORT : __ sign_extend_short(rax); break; 2091 case T_INT : /* nothing to do */ break; 2092 case T_DOUBLE : 2093 case T_FLOAT : 2094 // Result is in st0 we'll save as needed 2095 break; 2096 case T_ARRAY: // Really a handle 2097 case T_OBJECT: // Really a handle 2098 break; // can't de-handlize until after safepoint check 2099 case T_VOID: break; 2100 case T_LONG: break; 2101 default : ShouldNotReachHere(); 2102 } 2103 2104 // Switch thread to "native transition" state before reading the synchronization state. 2105 // This additional state is necessary because reading and testing the synchronization 2106 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2107 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2108 // VM thread changes sync state to synchronizing and suspends threads for GC. 2109 // Thread A is resumed to finish this native method, but doesn't block here since it 2110 // didn't see any synchronization is progress, and escapes. 2111 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2112 2113 if(os::is_MP()) { 2114 if (UseMembar) { 2115 // Force this write out before the read below 2116 __ membar(Assembler::Membar_mask_bits( 2117 Assembler::LoadLoad | Assembler::LoadStore | 2118 Assembler::StoreLoad | Assembler::StoreStore)); 2119 } else { 2120 // Write serialization page so VM thread can do a pseudo remote membar. 2121 // We use the current thread pointer to calculate a thread specific | 1281 } else if (in_regs[i].first()->is_XMMRegister()) { 1282 if (in_sig_bt[i] == T_FLOAT) { 1283 int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area; 1284 int offset = slot * VMRegImpl::stack_slot_size; 1285 assert(handle_index <= stack_slots, "overflow"); 1286 if (map != NULL) { 1287 __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); 1288 } else { 1289 __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); 1290 } 1291 } 1292 } else if (in_regs[i].first()->is_stack()) { 1293 if (in_sig_bt[i] == T_ARRAY && map != NULL) { 1294 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1295 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1296 } 1297 } 1298 } 1299 } 1300 1301 // Registers need to be saved for runtime call 1302 static Register caller_saved_registers[] = { 1303 rcx, rdx, rsi, rdi 1304 }; 1305 1306 // Save caller saved registers except r1 and r2 1307 static void save_registers_except(MacroAssembler* masm, Register r1, Register r2) { 1308 int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register)); 1309 for (int index = 0; index < reg_len; index ++) { 1310 Register this_reg = caller_saved_registers[index]; 1311 if (this_reg != r1 && this_reg != r2) { 1312 __ push(this_reg); 1313 } 1314 } 1315 } 1316 1317 // Restore caller saved registers except r1 and r2 1318 static void restore_registers_except(MacroAssembler* masm, Register r1, Register r2) { 1319 int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register)); 1320 for (int index = reg_len - 1; index >= 0; index --) { 1321 Register this_reg = caller_saved_registers[index]; 1322 if (this_reg != r1 && this_reg != r2) { 1323 __ pop(this_reg); 1324 } 1325 } 1326 } 1327 1328 // Pin object, return pinned object or null in rax 1329 static void gen_pin_object(MacroAssembler* masm, 1330 Register thread, VMRegPair reg) { 1331 __ block_comment("gen_pin_object {"); 1332 1333 Label is_null; 1334 Register tmp_reg = rax; 1335 VMRegPair tmp(tmp_reg->as_VMReg()); 1336 if (reg.first()->is_stack()) { 1337 // Load the arg up from the stack 1338 simple_move32(masm, reg, tmp); 1339 reg = tmp; 1340 } else { 1341 __ movl(tmp_reg, reg.first()->as_Register()); 1342 } 1343 __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); 1344 __ jccb(Assembler::equal, is_null); 1345 1346 // Save registers that may be used by runtime call 1347 Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg; 1348 save_registers_except(masm, arg, thread); 1349 1350 __ call_VM_leaf( 1351 CAST_FROM_FN_PTR(address, SharedRuntime::pin_object), 1352 thread, reg.first()->as_Register()); 1353 1354 // Restore saved registers 1355 restore_registers_except(masm, arg, thread); 1356 1357 __ bind(is_null); 1358 __ block_comment("} gen_pin_object"); 1359 } 1360 1361 // Unpin object 1362 static void gen_unpin_object(MacroAssembler* masm, 1363 Register thread, VMRegPair reg) { 1364 __ block_comment("gen_unpin_object {"); 1365 Label is_null; 1366 1367 // temp register 1368 __ push(rax); 1369 Register tmp_reg = rax; 1370 VMRegPair tmp(tmp_reg->as_VMReg()); 1371 1372 simple_move32(masm, reg, tmp); 1373 1374 __ testptr(rax, rax); 1375 __ jccb(Assembler::equal, is_null); 1376 1377 // Save registers that may be used by runtime call 1378 Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg; 1379 save_registers_except(masm, arg, thread); 1380 1381 __ call_VM_leaf( 1382 CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object), 1383 thread, rax); 1384 1385 // Restore saved registers 1386 restore_registers_except(masm, arg, thread); 1387 __ bind(is_null); 1388 __ pop(rax); 1389 __ block_comment("} gen_unpin_object"); 1390 } 1391 1392 // Check GC_locker::needs_gc and enter the runtime if it's true. This 1393 // keeps a new JNI critical region from starting until a GC has been 1394 // forced. Save down any oops in registers and describe them in an 1395 // OopMap. 1396 static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1397 Register thread, 1398 int stack_slots, 1399 int total_c_args, 1400 int total_in_args, 1401 int arg_save_area, 1402 OopMapSet* oop_maps, 1403 VMRegPair* in_regs, 1404 BasicType* in_sig_bt) { 1405 __ block_comment("check GC_locker::needs_gc"); 1406 Label cont; 1407 __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false); 1408 __ jcc(Assembler::equal, cont); 1409 1410 // Save down any incoming oops and call into the runtime to halt for a GC 1411 1930 } else { 1931 __ empty_FPU_stack(); 1932 } 1933 #endif /* COMPILER2 */ 1934 1935 // Compute the rbp, offset for any slots used after the jni call 1936 1937 int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; 1938 1939 // We use rdi as a thread pointer because it is callee save and 1940 // if we load it once it is usable thru the entire wrapper 1941 const Register thread = rdi; 1942 1943 // We use rsi as the oop handle for the receiver/klass 1944 // It is callee save so it survives the call to native 1945 1946 const Register oop_handle_reg = rsi; 1947 1948 __ get_thread(thread); 1949 1950 if (is_critical_native && !Universe::heap()->supports_object_pinning()) { 1951 check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args, 1952 oop_handle_offset, oop_maps, in_regs, in_sig_bt); 1953 } 1954 1955 // 1956 // We immediately shuffle the arguments so that any vm call we have to 1957 // make from here on out (sync slow path, jvmti, etc.) we will have 1958 // captured the oops from our caller and have a valid oopMap for 1959 // them. 1960 1961 // ----------------- 1962 // The Grand Shuffle 1963 // 1964 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1965 // and, if static, the class mirror instead of a receiver. This pretty much 1966 // guarantees that register layout will not match (and x86 doesn't use reg 1967 // parms though amd does). Since the native abi doesn't use register args 1968 // and the java conventions does we don't have to worry about collisions. 1969 // All of our moved are reg->stack or stack->stack. 1970 // We ignore the extra arguments during the shuffle and handle them at the 1971 // last moment. The shuffle is described by the two calling convention 1972 // vectors we have in our possession. We simply walk the java vector to 1973 // get the source locations and the c vector to get the destinations. 1974 1975 int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 ); 1976 1977 // Record rsp-based slot for receiver on stack for non-static methods 1978 int receiver_offset = -1; 1979 1980 // This is a trick. We double the stack slots so we can claim 1981 // the oops in the caller's frame. Since we are sure to have 1982 // more args than the caller doubling is enough to make 1983 // sure we can capture all the incoming oop args from the 1984 // caller. 1985 // 1986 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1987 1988 // Inbound arguments that need to be pinned for critical natives 1989 GrowableArray<int> pinned_args(total_in_args); 1990 // Current stack slot for storing register based array argument 1991 int pinned_slot = oop_handle_offset; 1992 1993 // Mark location of rbp, 1994 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg()); 1995 1996 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx 1997 // Are free to temporaries if we have to do stack to steck moves. 1998 // All inbound args are referenced based on rbp, and all outbound args via rsp. 1999 2000 for (int i = 0; i < total_in_args ; i++, c_arg++ ) { 2001 switch (in_sig_bt[i]) { 2002 case T_ARRAY: 2003 if (is_critical_native) { 2004 VMRegPair in_arg = in_regs[i]; 2005 if (Universe::heap()->supports_object_pinning()) { 2006 // gen_pin_object handles save and restore 2007 // of any clobbered registers 2008 gen_pin_object(masm, thread, in_arg); 2009 pinned_args.append(i); 2010 2011 // rax has pinned array 2012 VMRegPair result_reg(rax->as_VMReg()); 2013 if (!in_arg.first()->is_stack()) { 2014 assert(pinned_slot <= stack_slots, "overflow"); 2015 simple_move32(masm, result_reg, VMRegImpl::stack2reg(pinned_slot)); 2016 pinned_slot += VMRegImpl::slots_per_word; 2017 } else { 2018 // Write back pinned value, it will be used to unpin this argument 2019 __ movptr(Address(rbp, reg2offset_in(in_arg.first())), result_reg.first()->as_Register()); 2020 } 2021 // We have the array in register, use it 2022 in_arg = result_reg; 2023 } 2024 2025 unpack_array_argument(masm, in_arg, in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); 2026 c_arg++; 2027 break; 2028 } 2029 case T_OBJECT: 2030 assert(!is_critical_native, "no oop arguments"); 2031 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2032 ((i == 0) && (!is_static)), 2033 &receiver_offset); 2034 break; 2035 case T_VOID: 2036 break; 2037 2038 case T_FLOAT: 2039 float_move(masm, in_regs[i], out_regs[c_arg]); 2040 break; 2041 2042 case T_DOUBLE: 2043 assert( i + 1 < total_in_args && 2044 in_sig_bt[i + 1] == T_VOID && 2045 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2199 // and continue to do SP relative addressing but we instead switch to FP 2200 // relative addressing. 2201 2202 // Unpack native results. 2203 switch (ret_type) { 2204 case T_BOOLEAN: __ c2bool(rax); break; 2205 case T_CHAR : __ andptr(rax, 0xFFFF); break; 2206 case T_BYTE : __ sign_extend_byte (rax); break; 2207 case T_SHORT : __ sign_extend_short(rax); break; 2208 case T_INT : /* nothing to do */ break; 2209 case T_DOUBLE : 2210 case T_FLOAT : 2211 // Result is in st0 we'll save as needed 2212 break; 2213 case T_ARRAY: // Really a handle 2214 case T_OBJECT: // Really a handle 2215 break; // can't de-handlize until after safepoint check 2216 case T_VOID: break; 2217 case T_LONG: break; 2218 default : ShouldNotReachHere(); 2219 } 2220 2221 // unpin pinned arguments 2222 pinned_slot = oop_handle_offset; 2223 if (pinned_args.length() > 0) { 2224 // save return value that may be overwritten otherwise. 2225 save_native_result(masm, ret_type, stack_slots); 2226 for (int index = 0; index < pinned_args.length(); index ++) { 2227 int i = pinned_args.at(index); 2228 assert(pinned_slot <= stack_slots, "overflow"); 2229 if (!in_regs[i].first()->is_stack()) { 2230 int offset = pinned_slot * VMRegImpl::stack_slot_size; 2231 __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset)); 2232 pinned_slot += VMRegImpl::slots_per_word; 2233 } 2234 // gen_pin_object handles save and restore 2235 // of any other clobbered registers 2236 gen_unpin_object(masm, thread, in_regs[i]); 2237 } 2238 restore_native_result(masm, ret_type, stack_slots); 2239 } 2240 2241 // Switch thread to "native transition" state before reading the synchronization state. 2242 // This additional state is necessary because reading and testing the synchronization 2243 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2244 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2245 // VM thread changes sync state to synchronizing and suspends threads for GC. 2246 // Thread A is resumed to finish this native method, but doesn't block here since it 2247 // didn't see any synchronization is progress, and escapes. 2248 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2249 2250 if(os::is_MP()) { 2251 if (UseMembar) { 2252 // Force this write out before the read below 2253 __ membar(Assembler::Membar_mask_bits( 2254 Assembler::LoadLoad | Assembler::LoadStore | 2255 Assembler::StoreLoad | Assembler::StoreStore)); 2256 } else { 2257 // Write serialization page so VM thread can do a pseudo remote membar. 2258 // We use the current thread pointer to calculate a thread specific |