< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_32.cpp

Print this page




1281     } else if (in_regs[i].first()->is_XMMRegister()) {
1282       if (in_sig_bt[i] == T_FLOAT) {
1283         int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1284         int offset = slot * VMRegImpl::stack_slot_size;
1285         assert(handle_index <= stack_slots, "overflow");
1286         if (map != NULL) {
1287           __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1288         } else {
1289           __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1290         }
1291       }
1292     } else if (in_regs[i].first()->is_stack()) {
1293       if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1294         int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1295         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1296       }
1297     }
1298   }
1299 }
1300 



























































































1301 // Check GC_locker::needs_gc and enter the runtime if it's true.  This
1302 // keeps a new JNI critical region from starting until a GC has been
1303 // forced.  Save down any oops in registers and describe them in an
1304 // OopMap.
1305 static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1306                                                Register thread,
1307                                                int stack_slots,
1308                                                int total_c_args,
1309                                                int total_in_args,
1310                                                int arg_save_area,
1311                                                OopMapSet* oop_maps,
1312                                                VMRegPair* in_regs,
1313                                                BasicType* in_sig_bt) {
1314   __ block_comment("check GC_locker::needs_gc");
1315   Label cont;
1316   __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
1317   __ jcc(Assembler::equal, cont);
1318 
1319   // Save down any incoming oops and call into the runtime to halt for a GC
1320 


1839   } else {
1840     __ empty_FPU_stack();
1841   }
1842 #endif /* COMPILER2 */
1843 
1844   // Compute the rbp, offset for any slots used after the jni call
1845 
1846   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1847 
1848   // We use rdi as a thread pointer because it is callee save and
1849   // if we load it once it is usable thru the entire wrapper
1850   const Register thread = rdi;
1851 
1852   // We use rsi as the oop handle for the receiver/klass
1853   // It is callee save so it survives the call to native
1854 
1855   const Register oop_handle_reg = rsi;
1856 
1857   __ get_thread(thread);
1858 
1859   if (is_critical_native) {
1860     check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
1861                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1862   }
1863 
1864   //
1865   // We immediately shuffle the arguments so that any vm call we have to
1866   // make from here on out (sync slow path, jvmti, etc.) we will have
1867   // captured the oops from our caller and have a valid oopMap for
1868   // them.
1869 
1870   // -----------------
1871   // The Grand Shuffle
1872   //
1873   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1874   // and, if static, the class mirror instead of a receiver.  This pretty much
1875   // guarantees that register layout will not match (and x86 doesn't use reg
1876   // parms though amd does).  Since the native abi doesn't use register args
1877   // and the java conventions does we don't have to worry about collisions.
1878   // All of our moved are reg->stack or stack->stack.
1879   // We ignore the extra arguments during the shuffle and handle them at the
1880   // last moment. The shuffle is described by the two calling convention
1881   // vectors we have in our possession. We simply walk the java vector to
1882   // get the source locations and the c vector to get the destinations.
1883 
1884   int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
1885 
1886   // Record rsp-based slot for receiver on stack for non-static methods
1887   int receiver_offset = -1;
1888 
1889   // This is a trick. We double the stack slots so we can claim
1890   // the oops in the caller's frame. Since we are sure to have
1891   // more args than the caller doubling is enough to make
1892   // sure we can capture all the incoming oop args from the
1893   // caller.
1894   //
1895   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1896 





1897   // Mark location of rbp,
1898   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1899 
1900   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1901   // Are free to temporaries if we have to do  stack to steck moves.
1902   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1903 
1904   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1905     switch (in_sig_bt[i]) {
1906       case T_ARRAY:
1907         if (is_critical_native) {
1908           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);





















1909           c_arg++;
1910           break;
1911         }
1912       case T_OBJECT:
1913         assert(!is_critical_native, "no oop arguments");
1914         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1915                     ((i == 0) && (!is_static)),
1916                     &receiver_offset);
1917         break;
1918       case T_VOID:
1919         break;
1920 
1921       case T_FLOAT:
1922         float_move(masm, in_regs[i], out_regs[c_arg]);
1923           break;
1924 
1925       case T_DOUBLE:
1926         assert( i + 1 < total_in_args &&
1927                 in_sig_bt[i + 1] == T_VOID &&
1928                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");


2082   // and continue to do SP relative addressing but we instead switch to FP
2083   // relative addressing.
2084 
2085   // Unpack native results.
2086   switch (ret_type) {
2087   case T_BOOLEAN: __ c2bool(rax);            break;
2088   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2089   case T_BYTE   : __ sign_extend_byte (rax); break;
2090   case T_SHORT  : __ sign_extend_short(rax); break;
2091   case T_INT    : /* nothing to do */        break;
2092   case T_DOUBLE :
2093   case T_FLOAT  :
2094     // Result is in st0 we'll save as needed
2095     break;
2096   case T_ARRAY:                 // Really a handle
2097   case T_OBJECT:                // Really a handle
2098       break; // can't de-handlize until after safepoint check
2099   case T_VOID: break;
2100   case T_LONG: break;
2101   default       : ShouldNotReachHere();




















2102   }
2103 
2104   // Switch thread to "native transition" state before reading the synchronization state.
2105   // This additional state is necessary because reading and testing the synchronization
2106   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2107   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2108   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2109   //     Thread A is resumed to finish this native method, but doesn't block here since it
2110   //     didn't see any synchronization is progress, and escapes.
2111   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2112 
2113   if(os::is_MP()) {
2114     if (UseMembar) {
2115       // Force this write out before the read below
2116       __ membar(Assembler::Membar_mask_bits(
2117            Assembler::LoadLoad | Assembler::LoadStore |
2118            Assembler::StoreLoad | Assembler::StoreStore));
2119     } else {
2120       // Write serialization page so VM thread can do a pseudo remote membar.
2121       // We use the current thread pointer to calculate a thread specific




1281     } else if (in_regs[i].first()->is_XMMRegister()) {
1282       if (in_sig_bt[i] == T_FLOAT) {
1283         int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1284         int offset = slot * VMRegImpl::stack_slot_size;
1285         assert(handle_index <= stack_slots, "overflow");
1286         if (map != NULL) {
1287           __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1288         } else {
1289           __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1290         }
1291       }
1292     } else if (in_regs[i].first()->is_stack()) {
1293       if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1294         int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1295         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1296       }
1297     }
1298   }
1299 }
1300 
1301 // Registers need to be saved for runtime call
1302 static Register caller_saved_registers[] = {
1303   rcx, rdx, rsi, rdi
1304 };
1305 
1306 // Save caller saved registers except r1 and r2
1307 static void save_registers_except(MacroAssembler* masm, Register r1, Register r2) {
1308   int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
1309   for (int index = 0; index < reg_len; index ++) {
1310     Register this_reg = caller_saved_registers[index];
1311     if (this_reg != r1 && this_reg != r2) {
1312       __ push(this_reg);
1313     }
1314   }
1315 }
1316 
1317 // Restore caller saved registers except r1 and r2
1318 static void restore_registers_except(MacroAssembler* masm, Register r1, Register r2) {
1319   int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
1320   for (int index = reg_len - 1; index >= 0; index --) {
1321     Register this_reg = caller_saved_registers[index];
1322     if (this_reg != r1 && this_reg != r2) {
1323       __ pop(this_reg);
1324     }
1325   }
1326 }
1327 
1328 // Pin object, return pinned object or null in rax
1329 static void gen_pin_object(MacroAssembler* masm,
1330                            Register thread, VMRegPair reg) {
1331   __ block_comment("gen_pin_object {");
1332 
1333   Label is_null;
1334   Register tmp_reg = rax;
1335   VMRegPair tmp(tmp_reg->as_VMReg());
1336   if (reg.first()->is_stack()) {
1337     // Load the arg up from the stack
1338     simple_move32(masm, reg, tmp);
1339     reg = tmp;
1340   } else {
1341     __ movl(tmp_reg, reg.first()->as_Register());
1342   }
1343   __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
1344   __ jccb(Assembler::equal, is_null);
1345 
1346   // Save registers that may be used by runtime call
1347   Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
1348   save_registers_except(masm, arg, thread);
1349 
1350   __ call_VM_leaf(
1351     CAST_FROM_FN_PTR(address, SharedRuntime::pin_object),
1352     thread, reg.first()->as_Register());
1353 
1354   // Restore saved registers
1355   restore_registers_except(masm, arg, thread);
1356 
1357   __ bind(is_null);
1358   __ block_comment("} gen_pin_object");
1359 }
1360 
1361 // Unpin object
1362 static void gen_unpin_object(MacroAssembler* masm,
1363                              Register thread, VMRegPair reg) {
1364   __ block_comment("gen_unpin_object {");
1365   Label is_null;
1366 
1367   // temp register
1368   __ push(rax);
1369   Register tmp_reg = rax;
1370   VMRegPair tmp(tmp_reg->as_VMReg());
1371 
1372   simple_move32(masm, reg, tmp);
1373 
1374   __ testptr(rax, rax);
1375   __ jccb(Assembler::equal, is_null);
1376 
1377   // Save registers that may be used by runtime call
1378   Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
1379   save_registers_except(masm, arg, thread);
1380 
1381   __ call_VM_leaf(
1382     CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object),
1383     thread, rax);
1384 
1385   // Restore saved registers
1386   restore_registers_except(masm, arg, thread);
1387   __ bind(is_null);
1388   __ pop(rax);
1389   __ block_comment("} gen_unpin_object");
1390 }
1391 
1392 // Check GC_locker::needs_gc and enter the runtime if it's true.  This
1393 // keeps a new JNI critical region from starting until a GC has been
1394 // forced.  Save down any oops in registers and describe them in an
1395 // OopMap.
1396 static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1397                                                Register thread,
1398                                                int stack_slots,
1399                                                int total_c_args,
1400                                                int total_in_args,
1401                                                int arg_save_area,
1402                                                OopMapSet* oop_maps,
1403                                                VMRegPair* in_regs,
1404                                                BasicType* in_sig_bt) {
1405   __ block_comment("check GC_locker::needs_gc");
1406   Label cont;
1407   __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
1408   __ jcc(Assembler::equal, cont);
1409 
1410   // Save down any incoming oops and call into the runtime to halt for a GC
1411 


1930   } else {
1931     __ empty_FPU_stack();
1932   }
1933 #endif /* COMPILER2 */
1934 
1935   // Compute the rbp, offset for any slots used after the jni call
1936 
1937   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1938 
1939   // We use rdi as a thread pointer because it is callee save and
1940   // if we load it once it is usable thru the entire wrapper
1941   const Register thread = rdi;
1942 
1943   // We use rsi as the oop handle for the receiver/klass
1944   // It is callee save so it survives the call to native
1945 
1946   const Register oop_handle_reg = rsi;
1947 
1948   __ get_thread(thread);
1949 
1950   if (is_critical_native && !Universe::heap()->supports_object_pinning()) {
1951     check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
1952                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1953   }
1954 
1955   //
1956   // We immediately shuffle the arguments so that any vm call we have to
1957   // make from here on out (sync slow path, jvmti, etc.) we will have
1958   // captured the oops from our caller and have a valid oopMap for
1959   // them.
1960 
1961   // -----------------
1962   // The Grand Shuffle
1963   //
1964   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1965   // and, if static, the class mirror instead of a receiver.  This pretty much
1966   // guarantees that register layout will not match (and x86 doesn't use reg
1967   // parms though amd does).  Since the native abi doesn't use register args
1968   // and the java conventions does we don't have to worry about collisions.
1969   // All of our moved are reg->stack or stack->stack.
1970   // We ignore the extra arguments during the shuffle and handle them at the
1971   // last moment. The shuffle is described by the two calling convention
1972   // vectors we have in our possession. We simply walk the java vector to
1973   // get the source locations and the c vector to get the destinations.
1974 
1975   int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
1976 
1977   // Record rsp-based slot for receiver on stack for non-static methods
1978   int receiver_offset = -1;
1979 
1980   // This is a trick. We double the stack slots so we can claim
1981   // the oops in the caller's frame. Since we are sure to have
1982   // more args than the caller doubling is enough to make
1983   // sure we can capture all the incoming oop args from the
1984   // caller.
1985   //
1986   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1987 
1988   // Inbound arguments that need to be pinned for critical natives
1989   GrowableArray<int> pinned_args(total_in_args);
1990   // Current stack slot for storing register based array argument
1991   int pinned_slot = oop_handle_offset;
1992 
1993   // Mark location of rbp,
1994   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1995 
1996   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1997   // Are free to temporaries if we have to do  stack to steck moves.
1998   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1999 
2000   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
2001     switch (in_sig_bt[i]) {
2002       case T_ARRAY:
2003         if (is_critical_native) {
2004           VMRegPair in_arg = in_regs[i];
2005           if (Universe::heap()->supports_object_pinning()) {
2006             // gen_pin_object handles save and restore
2007             // of any clobbered registers
2008             gen_pin_object(masm, thread, in_arg);
2009             pinned_args.append(i);
2010 
2011             // rax has pinned array
2012             VMRegPair result_reg(rax->as_VMReg());
2013             if (!in_arg.first()->is_stack()) {
2014               assert(pinned_slot <= stack_slots, "overflow");
2015               simple_move32(masm, result_reg, VMRegImpl::stack2reg(pinned_slot));
2016               pinned_slot += VMRegImpl::slots_per_word;
2017             } else {
2018               // Write back pinned value, it will be used to unpin this argument
2019               __ movptr(Address(rbp, reg2offset_in(in_arg.first())), result_reg.first()->as_Register());
2020             }
2021             // We have the array in register, use it
2022             in_arg = result_reg;
2023           }
2024 
2025           unpack_array_argument(masm, in_arg, in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
2026           c_arg++;
2027           break;
2028         }
2029       case T_OBJECT:
2030         assert(!is_critical_native, "no oop arguments");
2031         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2032                     ((i == 0) && (!is_static)),
2033                     &receiver_offset);
2034         break;
2035       case T_VOID:
2036         break;
2037 
2038       case T_FLOAT:
2039         float_move(masm, in_regs[i], out_regs[c_arg]);
2040           break;
2041 
2042       case T_DOUBLE:
2043         assert( i + 1 < total_in_args &&
2044                 in_sig_bt[i + 1] == T_VOID &&
2045                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");


2199   // and continue to do SP relative addressing but we instead switch to FP
2200   // relative addressing.
2201 
2202   // Unpack native results.
2203   switch (ret_type) {
2204   case T_BOOLEAN: __ c2bool(rax);            break;
2205   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2206   case T_BYTE   : __ sign_extend_byte (rax); break;
2207   case T_SHORT  : __ sign_extend_short(rax); break;
2208   case T_INT    : /* nothing to do */        break;
2209   case T_DOUBLE :
2210   case T_FLOAT  :
2211     // Result is in st0 we'll save as needed
2212     break;
2213   case T_ARRAY:                 // Really a handle
2214   case T_OBJECT:                // Really a handle
2215       break; // can't de-handlize until after safepoint check
2216   case T_VOID: break;
2217   case T_LONG: break;
2218   default       : ShouldNotReachHere();
2219   }
2220 
2221   // unpin pinned arguments
2222   pinned_slot = oop_handle_offset;
2223   if (pinned_args.length() > 0) {
2224     // save return value that may be overwritten otherwise.
2225     save_native_result(masm, ret_type, stack_slots);
2226     for (int index = 0; index < pinned_args.length(); index ++) {
2227       int i = pinned_args.at(index);
2228       assert(pinned_slot <= stack_slots, "overflow");
2229       if (!in_regs[i].first()->is_stack()) {
2230         int offset = pinned_slot * VMRegImpl::stack_slot_size;
2231         __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
2232         pinned_slot += VMRegImpl::slots_per_word;
2233       }
2234       // gen_pin_object handles save and restore
2235       // of any other clobbered registers
2236       gen_unpin_object(masm, thread, in_regs[i]);
2237     }
2238     restore_native_result(masm, ret_type, stack_slots);
2239   }
2240 
2241   // Switch thread to "native transition" state before reading the synchronization state.
2242   // This additional state is necessary because reading and testing the synchronization
2243   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2244   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2245   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2246   //     Thread A is resumed to finish this native method, but doesn't block here since it
2247   //     didn't see any synchronization is progress, and escapes.
2248   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2249 
2250   if(os::is_MP()) {
2251     if (UseMembar) {
2252       // Force this write out before the read below
2253       __ membar(Assembler::Membar_mask_bits(
2254            Assembler::LoadLoad | Assembler::LoadStore |
2255            Assembler::StoreLoad | Assembler::StoreStore));
2256     } else {
2257       // Write serialization page so VM thread can do a pseudo remote membar.
2258       // We use the current thread pointer to calculate a thread specific


< prev index next >