< prev index next >

src/hotspot/cpu/x86/x86_32.ad

Print this page

 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       masm.empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1742     } else {
 1743       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       MacroAssembler masm(&cbuf);
 1750       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1751     }
 1752   %}
 1753 
 1754   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1755     // This is the instruction starting address for relocation info.

 1756     cbuf.set_insts_mark();
 1757     $$$emit8$primary;
 1758     // CALL directly to the runtime
 1759     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1760                 runtime_call_Relocation::spec(), RELOC_IMM32 );

 1761 
 1762     if (UseSSE >= 2) {
 1763       MacroAssembler _masm(&cbuf);
 1764       BasicType rt = tf()->return_type();
 1765 
 1766       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1767         // A C runtime call where the return value is unused.  In SSE2+
 1768         // mode the result needs to be removed from the FPU stack.  It's
 1769         // likely that this function call could be removed by the
 1770         // optimizer if the C function is a pure function.
 1771         __ ffree(0);
 1772       } else if (rt == T_FLOAT) {
 1773         __ lea(rsp, Address(rsp, -4));
 1774         __ fstp_s(Address(rsp, 0));
 1775         __ movflt(xmm0, Address(rsp, 0));
 1776         __ lea(rsp, Address(rsp,  4));
 1777       } else if (rt == T_DOUBLE) {
 1778         __ lea(rsp, Address(rsp, -8));
 1779         __ fstp_d(Address(rsp, 0));
 1780         __ movdbl(xmm0, Address(rsp, 0));

 1792     }
 1793     // Clear upper bits of YMM registers when current compiled code uses
 1794     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1795     MacroAssembler _masm(&cbuf);
 1796     __ vzeroupper();
 1797     debug_only(int off1 = cbuf.insts_size());
 1798     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1799   %}
 1800 
 1801   enc_class post_call_FPU %{
 1802     // If method sets FPU control word do it here also
 1803     if (Compile::current()->in_24_bit_fp_mode()) {
 1804       MacroAssembler masm(&cbuf);
 1805       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1806     }
 1807   %}
 1808 
 1809   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1810     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1811     // who we intended to call.

 1812     cbuf.set_insts_mark();
 1813     $$$emit8$primary;
 1814 
 1815     if (!_method) {
 1816       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1817                      runtime_call_Relocation::spec(),
 1818                      RELOC_IMM32);

 1819     } else {
 1820       int method_index = resolved_method_index(cbuf);
 1821       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1822                                                   : static_call_Relocation::spec(method_index);
 1823       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1824                      rspec, RELOC_DISP32);

 1825       // Emit stubs for static call.
 1826       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1827       if (stub == NULL) {
 1828         ciEnv::current()->record_failure("CodeCache is full");
 1829         return;
 1830       }
 1831     }
 1832   %}
 1833 
 1834   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1835     MacroAssembler _masm(&cbuf);
 1836     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));

 1837   %}
 1838 
 1839   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1840     int disp = in_bytes(Method::from_compiled_offset());
 1841     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1842 
 1843     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]

 1844     cbuf.set_insts_mark();
 1845     $$$emit8$primary;
 1846     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1847     emit_d8(cbuf, disp);             // Displacement
 1848 
 1849   %}
 1850 
 1851 //   Following encoding is no longer used, but may be restored if calling
 1852 //   convention changes significantly.
 1853 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1854 //
 1855 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1856 //     // int ic_reg     = Matcher::inline_cache_reg();
 1857 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1858 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1859 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1860 //
 1861 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1862 //     // // so we load it immediately before the call
 1863 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1864 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1865 //
 1866 //     // xor rbp,ebp
 1867 //     emit_opcode(cbuf, 0x33);
 1868 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);

 2770     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2771     emit_d32( cbuf, (int)$src$$constant );
 2772     // MUL   EDX:EAX,EDX
 2773     emit_opcode( cbuf, 0xF7 );
 2774     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2775     // ADD    EDX,ESI
 2776     emit_opcode( cbuf, 0x03 );
 2777     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2778   %}
 2779 
 2780   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2781     // PUSH src1.hi
 2782     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2783     // PUSH src1.lo
 2784     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2785     // PUSH src2.hi
 2786     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2787     // PUSH src2.lo
 2788     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2789     // CALL directly to the runtime

 2790     cbuf.set_insts_mark();
 2791     emit_opcode(cbuf,0xE8);       // Call into runtime
 2792     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );

 2793     // Restore stack
 2794     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2795     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2796     emit_d8(cbuf, 4*4);
 2797   %}
 2798 
 2799   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2800     // PUSH src1.hi
 2801     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2802     // PUSH src1.lo
 2803     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2804     // PUSH src2.hi
 2805     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2806     // PUSH src2.lo
 2807     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2808     // CALL directly to the runtime

 2809     cbuf.set_insts_mark();
 2810     emit_opcode(cbuf,0xE8);       // Call into runtime
 2811     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );

 2812     // Restore stack
 2813     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2814     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2815     emit_d8(cbuf, 4*4);
 2816   %}
 2817 
 2818   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2819     // MOV   $tmp,$src.lo
 2820     emit_opcode(cbuf, 0x8B);
 2821     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2822     // OR    $tmp,$src.hi
 2823     emit_opcode(cbuf, 0x0B);
 2824     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2825   %}
 2826 
 2827   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2828     // CMP    $src1.lo,$src2.lo
 2829     emit_opcode( cbuf, 0x3B );
 2830     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2831     // JNE,s  skip

 2859     emit_opcode( cbuf, 0x1B );
 2860     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2861   %}
 2862 
 2863  // Sniff, sniff... smells like Gnu Superoptimizer
 2864   enc_class neg_long( eRegL dst ) %{
 2865     emit_opcode(cbuf,0xF7);    // NEG hi
 2866     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2867     emit_opcode(cbuf,0xF7);    // NEG lo
 2868     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2869     emit_opcode(cbuf,0x83);    // SBB hi,0
 2870     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2871     emit_d8    (cbuf,0 );
 2872   %}
 2873 
 2874   enc_class enc_pop_rdx() %{
 2875     emit_opcode(cbuf,0x5A);
 2876   %}
 2877 
 2878   enc_class enc_rethrow() %{

 2879     cbuf.set_insts_mark();
 2880     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2881     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2882                    runtime_call_Relocation::spec(), RELOC_IMM32 );

 2883   %}
 2884 
 2885 
 2886   // Convert a double to an int.  Java semantics require we do complex
 2887   // manglelations in the corner cases.  So we set the rounding mode to
 2888   // 'zero', store the darned double down as an int, and reset the
 2889   // rounding mode to 'nearest'.  The hardware throws an exception which
 2890   // patches up the correct value directly to the stack.
 2891   enc_class DPR2I_encoding( regDPR src ) %{
 2892     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2893     // exceptions here, so that a NAN or other corner-case value will
 2894     // thrown an exception (but normal values get converted at full speed).
 2895     // However, I2C adapters and other float-stack manglers leave pending
 2896     // invalid-op exceptions hanging.  We would have to clear them before
 2897     // enabling them and that is more expensive than just testing for the
 2898     // invalid value Intel stores down in the corner cases.
 2899     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2900     emit_opcode(cbuf,0x2D);
 2901     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2902     // Allocate a word

 2908     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2909     emit_opcode(cbuf,0x1C);
 2910     emit_d8(cbuf,0x24);
 2911     // Restore the rounding mode; mask the exception
 2912     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2913     emit_opcode(cbuf,0x2D);
 2914     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2915         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2916         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2917 
 2918     // Load the converted int; adjust CPU stack
 2919     emit_opcode(cbuf,0x58);       // POP EAX
 2920     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2921     emit_d32   (cbuf,0x80000000); //         0x80000000
 2922     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2923     emit_d8    (cbuf,0x07);       // Size of slow_call
 2924     // Push src onto stack slow-path
 2925     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2926     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2927     // CALL directly to the runtime

 2928     cbuf.set_insts_mark();
 2929     emit_opcode(cbuf,0xE8);       // Call into runtime
 2930     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );

 2931     // Carry on here...
 2932   %}
 2933 
 2934   enc_class DPR2L_encoding( regDPR src ) %{
 2935     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2936     emit_opcode(cbuf,0x2D);
 2937     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2938     // Allocate a word
 2939     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2940     emit_opcode(cbuf,0xEC);
 2941     emit_d8(cbuf,0x08);
 2942     // Encoding assumes a double has been pushed into FPR0.
 2943     // Store down the double as a long, popping the FPU stack
 2944     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2945     emit_opcode(cbuf,0x3C);
 2946     emit_d8(cbuf,0x24);
 2947     // Restore the rounding mode; mask the exception
 2948     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2949     emit_opcode(cbuf,0x2D);
 2950     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2951         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2952         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2953 
 2954     // Load the converted int; adjust CPU stack
 2955     emit_opcode(cbuf,0x58);       // POP EAX
 2956     emit_opcode(cbuf,0x5A);       // POP EDX
 2957     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2958     emit_d8    (cbuf,0xFA);       // rdx
 2959     emit_d32   (cbuf,0x80000000); //         0x80000000
 2960     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2961     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2962     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2963     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2964     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2965     emit_d8    (cbuf,0x07);       // Size of slow_call
 2966     // Push src onto stack slow-path
 2967     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2968     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2969     // CALL directly to the runtime

 2970     cbuf.set_insts_mark();
 2971     emit_opcode(cbuf,0xE8);       // Call into runtime
 2972     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );

 2973     // Carry on here...
 2974   %}
 2975 
 2976   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2977     // Operand was loaded from memory into fp ST (stack top)
 2978     // FMUL   ST,$src  /* D8 C8+i */
 2979     emit_opcode(cbuf, 0xD8);
 2980     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2981   %}
 2982 
 2983   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2984     // FADDP  ST,src2  /* D8 C0+i */
 2985     emit_opcode(cbuf, 0xD8);
 2986     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2987     //could use FADDP  src2,fpST  /* DE C0+i */
 2988   %}
 2989 
 2990   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2991     // FADDP  src2,ST  /* DE C0+i */
 2992     emit_opcode(cbuf, 0xDE);

10788   effect( KILL tmp, KILL cr );
10789   format %{ "CVTTSD2SI $dst, $src\n\t"
10790             "CMP    $dst,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "SUB    ESP, 8\n\t"
10793             "MOVSD  [ESP], $src\n\t"
10794             "FLD_D  [ESP]\n\t"
10795             "ADD    ESP, 8\n\t"
10796             "CALL   d2i_wrapper\n"
10797       "fast:" %}
10798   ins_encode %{
10799     Label fast;
10800     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10801     __ cmpl($dst$$Register, 0x80000000);
10802     __ jccb(Assembler::notEqual, fast);
10803     __ subptr(rsp, 8);
10804     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10805     __ fld_d(Address(rsp, 0));
10806     __ addptr(rsp, 8);
10807     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));

10808     __ bind(fast);
10809   %}
10810   ins_pipe( pipe_slow );
10811 %}
10812 
10813 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10814   predicate(UseSSE<=1);
10815   match(Set dst (ConvD2L src));
10816   effect( KILL cr );
10817   format %{ "FLD    $src\t# Convert double to long\n\t"
10818             "FLDCW  trunc mode\n\t"
10819             "SUB    ESP,8\n\t"
10820             "FISTp  [ESP + #0]\n\t"
10821             "FLDCW  std/24-bit mode\n\t"
10822             "POP    EAX\n\t"
10823             "POP    EDX\n\t"
10824             "CMP    EDX,0x80000000\n\t"
10825             "JNE,s  fast\n\t"
10826             "TEST   EAX,EAX\n\t"
10827             "JNE,s  fast\n\t"

10863     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10864     __ fistp_d(Address(rsp, 0));
10865     // Restore the rounding mode, mask the exception
10866     if (Compile::current()->in_24_bit_fp_mode()) {
10867       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10868     } else {
10869       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10870     }
10871     // Load the converted long, adjust CPU stack
10872     __ pop(rax);
10873     __ pop(rdx);
10874     __ cmpl(rdx, 0x80000000);
10875     __ jccb(Assembler::notEqual, fast);
10876     __ testl(rax, rax);
10877     __ jccb(Assembler::notEqual, fast);
10878     __ subptr(rsp, 8);
10879     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10880     __ fld_d(Address(rsp, 0));
10881     __ addptr(rsp, 8);
10882     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));

10883     __ bind(fast);
10884   %}
10885   ins_pipe( pipe_slow );
10886 %}
10887 
10888 // Convert a double to an int.  Java semantics require we do complex
10889 // manglations in the corner cases.  So we set the rounding mode to
10890 // 'zero', store the darned double down as an int, and reset the
10891 // rounding mode to 'nearest'.  The hardware stores a flag value down
10892 // if we would overflow or converted a NAN; we check for this and
10893 // and go the slow path if needed.
10894 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10895   predicate(UseSSE==0);
10896   match(Set dst (ConvF2I src));
10897   effect( KILL tmp, KILL cr );
10898   format %{ "FLD    $src\t# Convert float to int \n\t"
10899             "FLDCW  trunc mode\n\t"
10900             "SUB    ESP,4\n\t"
10901             "FISTp  [ESP + #0]\n\t"
10902             "FLDCW  std/24-bit mode\n\t"

10918   effect( KILL tmp, KILL cr );
10919   format %{ "CVTTSS2SI $dst, $src\n\t"
10920             "CMP    $dst,0x80000000\n\t"
10921             "JNE,s  fast\n\t"
10922             "SUB    ESP, 4\n\t"
10923             "MOVSS  [ESP], $src\n\t"
10924             "FLD    [ESP]\n\t"
10925             "ADD    ESP, 4\n\t"
10926             "CALL   d2i_wrapper\n"
10927       "fast:" %}
10928   ins_encode %{
10929     Label fast;
10930     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10931     __ cmpl($dst$$Register, 0x80000000);
10932     __ jccb(Assembler::notEqual, fast);
10933     __ subptr(rsp, 4);
10934     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10935     __ fld_s(Address(rsp, 0));
10936     __ addptr(rsp, 4);
10937     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));

10938     __ bind(fast);
10939   %}
10940   ins_pipe( pipe_slow );
10941 %}
10942 
10943 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10944   predicate(UseSSE==0);
10945   match(Set dst (ConvF2L src));
10946   effect( KILL cr );
10947   format %{ "FLD    $src\t# Convert float to long\n\t"
10948             "FLDCW  trunc mode\n\t"
10949             "SUB    ESP,8\n\t"
10950             "FISTp  [ESP + #0]\n\t"
10951             "FLDCW  std/24-bit mode\n\t"
10952             "POP    EAX\n\t"
10953             "POP    EDX\n\t"
10954             "CMP    EDX,0x80000000\n\t"
10955             "JNE,s  fast\n\t"
10956             "TEST   EAX,EAX\n\t"
10957             "JNE,s  fast\n\t"

10994     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10995     __ fistp_d(Address(rsp, 0));
10996     // Restore the rounding mode, mask the exception
10997     if (Compile::current()->in_24_bit_fp_mode()) {
10998       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10999     } else {
11000       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11001     }
11002     // Load the converted long, adjust CPU stack
11003     __ pop(rax);
11004     __ pop(rdx);
11005     __ cmpl(rdx, 0x80000000);
11006     __ jccb(Assembler::notEqual, fast);
11007     __ testl(rax, rax);
11008     __ jccb(Assembler::notEqual, fast);
11009     __ subptr(rsp, 4);
11010     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11011     __ fld_s(Address(rsp, 0));
11012     __ addptr(rsp, 4);
11013     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));

11014     __ bind(fast);
11015   %}
11016   ins_pipe( pipe_slow );
11017 %}
11018 
11019 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11020   predicate( UseSSE<=1 );
11021   match(Set dst (ConvI2D src));
11022   format %{ "FILD   $src\n\t"
11023             "FSTP   $dst" %}
11024   opcode(0xDB, 0x0);  /* DB /0 */
11025   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11026   ins_pipe( fpu_reg_mem );
11027 %}
11028 
11029 instruct convI2D_reg(regD dst, rRegI src) %{
11030   predicate( UseSSE>=2 && !UseXmmI2D );
11031   match(Set dst (ConvI2D src));
11032   format %{ "CVTSI2SD $dst,$src" %}
11033   ins_encode %{

 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       masm.empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1742     } else {
 1743       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       MacroAssembler masm(&cbuf);
 1750       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1751     }
 1752   %}
 1753 
 1754   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1755     // This is the instruction starting address for relocation info.
 1756     MacroAssembler _masm(&cbuf);
 1757     cbuf.set_insts_mark();
 1758     $$$emit8$primary;
 1759     // CALL directly to the runtime
 1760     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1761                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1762     __ post_call_nop();
 1763 
 1764     if (UseSSE >= 2) {
 1765       MacroAssembler _masm(&cbuf);
 1766       BasicType rt = tf()->return_type();
 1767 
 1768       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1769         // A C runtime call where the return value is unused.  In SSE2+
 1770         // mode the result needs to be removed from the FPU stack.  It's
 1771         // likely that this function call could be removed by the
 1772         // optimizer if the C function is a pure function.
 1773         __ ffree(0);
 1774       } else if (rt == T_FLOAT) {
 1775         __ lea(rsp, Address(rsp, -4));
 1776         __ fstp_s(Address(rsp, 0));
 1777         __ movflt(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  4));
 1779       } else if (rt == T_DOUBLE) {
 1780         __ lea(rsp, Address(rsp, -8));
 1781         __ fstp_d(Address(rsp, 0));
 1782         __ movdbl(xmm0, Address(rsp, 0));

 1794     }
 1795     // Clear upper bits of YMM registers when current compiled code uses
 1796     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1797     MacroAssembler _masm(&cbuf);
 1798     __ vzeroupper();
 1799     debug_only(int off1 = cbuf.insts_size());
 1800     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1801   %}
 1802 
 1803   enc_class post_call_FPU %{
 1804     // If method sets FPU control word do it here also
 1805     if (Compile::current()->in_24_bit_fp_mode()) {
 1806       MacroAssembler masm(&cbuf);
 1807       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1808     }
 1809   %}
 1810 
 1811   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1812     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1813     // who we intended to call.
 1814     MacroAssembler _masm(&cbuf);
 1815     cbuf.set_insts_mark();
 1816     $$$emit8$primary;
 1817 
 1818     if (!_method) {
 1819       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1820                      runtime_call_Relocation::spec(),
 1821                      RELOC_IMM32);
 1822       __ post_call_nop();
 1823     } else {
 1824       int method_index = resolved_method_index(cbuf);
 1825       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1826                                                   : static_call_Relocation::spec(method_index);
 1827       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1828                      rspec, RELOC_DISP32);
 1829       __ post_call_nop();
 1830       // Emit stubs for static call.
 1831       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1832       if (stub == NULL) {
 1833         ciEnv::current()->record_failure("CodeCache is full");
 1834         return;
 1835       }
 1836     }
 1837   %}
 1838 
 1839   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1840     MacroAssembler _masm(&cbuf);
 1841     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1842     __ post_call_nop();
 1843   %}
 1844 
 1845   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1846     int disp = in_bytes(Method::from_compiled_offset());
 1847     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1848 
 1849     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1850     MacroAssembler _masm(&cbuf);
 1851     cbuf.set_insts_mark();
 1852     $$$emit8$primary;
 1853     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1854     emit_d8(cbuf, disp);             // Displacement
 1855     __ post_call_nop();
 1856   %}
 1857 
 1858 //   Following encoding is no longer used, but may be restored if calling
 1859 //   convention changes significantly.
 1860 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1861 //
 1862 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1863 //     // int ic_reg     = Matcher::inline_cache_reg();
 1864 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1865 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1866 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1867 //
 1868 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1869 //     // // so we load it immediately before the call
 1870 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1871 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1872 //
 1873 //     // xor rbp,ebp
 1874 //     emit_opcode(cbuf, 0x33);
 1875 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);

 2777     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2778     emit_d32( cbuf, (int)$src$$constant );
 2779     // MUL   EDX:EAX,EDX
 2780     emit_opcode( cbuf, 0xF7 );
 2781     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2782     // ADD    EDX,ESI
 2783     emit_opcode( cbuf, 0x03 );
 2784     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2785   %}
 2786 
 2787   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2788     // PUSH src1.hi
 2789     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2790     // PUSH src1.lo
 2791     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2792     // PUSH src2.hi
 2793     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2794     // PUSH src2.lo
 2795     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2796     // CALL directly to the runtime
 2797     MacroAssembler _masm(&cbuf);
 2798     cbuf.set_insts_mark();
 2799     emit_opcode(cbuf,0xE8);       // Call into runtime
 2800     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2801     __ post_call_nop();
 2802     // Restore stack
 2803     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2804     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2805     emit_d8(cbuf, 4*4);
 2806   %}
 2807 
 2808   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2809     // PUSH src1.hi
 2810     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2811     // PUSH src1.lo
 2812     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2813     // PUSH src2.hi
 2814     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2815     // PUSH src2.lo
 2816     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2817     // CALL directly to the runtime
 2818     MacroAssembler _masm(&cbuf);
 2819     cbuf.set_insts_mark();
 2820     emit_opcode(cbuf,0xE8);       // Call into runtime
 2821     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2822     __ post_call_nop();
 2823     // Restore stack
 2824     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2825     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2826     emit_d8(cbuf, 4*4);
 2827   %}
 2828 
 2829   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2830     // MOV   $tmp,$src.lo
 2831     emit_opcode(cbuf, 0x8B);
 2832     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2833     // OR    $tmp,$src.hi
 2834     emit_opcode(cbuf, 0x0B);
 2835     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2836   %}
 2837 
 2838   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2839     // CMP    $src1.lo,$src2.lo
 2840     emit_opcode( cbuf, 0x3B );
 2841     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2842     // JNE,s  skip

 2870     emit_opcode( cbuf, 0x1B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2872   %}
 2873 
 2874  // Sniff, sniff... smells like Gnu Superoptimizer
 2875   enc_class neg_long( eRegL dst ) %{
 2876     emit_opcode(cbuf,0xF7);    // NEG hi
 2877     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2878     emit_opcode(cbuf,0xF7);    // NEG lo
 2879     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2880     emit_opcode(cbuf,0x83);    // SBB hi,0
 2881     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2882     emit_d8    (cbuf,0 );
 2883   %}
 2884 
 2885   enc_class enc_pop_rdx() %{
 2886     emit_opcode(cbuf,0x5A);
 2887   %}
 2888 
 2889   enc_class enc_rethrow() %{
 2890     MacroAssembler _masm(&cbuf);
 2891     cbuf.set_insts_mark();
 2892     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2893     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2894                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2895     __ post_call_nop();
 2896   %}
 2897 
 2898 
 2899   // Convert a double to an int.  Java semantics require we do complex
 2900   // manglelations in the corner cases.  So we set the rounding mode to
 2901   // 'zero', store the darned double down as an int, and reset the
 2902   // rounding mode to 'nearest'.  The hardware throws an exception which
 2903   // patches up the correct value directly to the stack.
 2904   enc_class DPR2I_encoding( regDPR src ) %{
 2905     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2906     // exceptions here, so that a NAN or other corner-case value will
 2907     // thrown an exception (but normal values get converted at full speed).
 2908     // However, I2C adapters and other float-stack manglers leave pending
 2909     // invalid-op exceptions hanging.  We would have to clear them before
 2910     // enabling them and that is more expensive than just testing for the
 2911     // invalid value Intel stores down in the corner cases.
 2912     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2913     emit_opcode(cbuf,0x2D);
 2914     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2915     // Allocate a word

 2921     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2922     emit_opcode(cbuf,0x1C);
 2923     emit_d8(cbuf,0x24);
 2924     // Restore the rounding mode; mask the exception
 2925     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2926     emit_opcode(cbuf,0x2D);
 2927     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2928         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2929         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2930 
 2931     // Load the converted int; adjust CPU stack
 2932     emit_opcode(cbuf,0x58);       // POP EAX
 2933     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2934     emit_d32   (cbuf,0x80000000); //         0x80000000
 2935     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2936     emit_d8    (cbuf,0x07);       // Size of slow_call
 2937     // Push src onto stack slow-path
 2938     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2939     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2940     // CALL directly to the runtime
 2941     MacroAssembler _masm(&cbuf);
 2942     cbuf.set_insts_mark();
 2943     emit_opcode(cbuf,0xE8);       // Call into runtime
 2944     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2945     __ post_call_nop();
 2946     // Carry on here...
 2947   %}
 2948 
 2949   enc_class DPR2L_encoding( regDPR src ) %{
 2950     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2951     emit_opcode(cbuf,0x2D);
 2952     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2953     // Allocate a word
 2954     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2955     emit_opcode(cbuf,0xEC);
 2956     emit_d8(cbuf,0x08);
 2957     // Encoding assumes a double has been pushed into FPR0.
 2958     // Store down the double as a long, popping the FPU stack
 2959     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2960     emit_opcode(cbuf,0x3C);
 2961     emit_d8(cbuf,0x24);
 2962     // Restore the rounding mode; mask the exception
 2963     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2964     emit_opcode(cbuf,0x2D);
 2965     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2966         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2967         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2968 
 2969     // Load the converted int; adjust CPU stack
 2970     emit_opcode(cbuf,0x58);       // POP EAX
 2971     emit_opcode(cbuf,0x5A);       // POP EDX
 2972     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2973     emit_d8    (cbuf,0xFA);       // rdx
 2974     emit_d32   (cbuf,0x80000000); //         0x80000000
 2975     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2976     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2977     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2978     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2979     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2980     emit_d8    (cbuf,0x07);       // Size of slow_call
 2981     // Push src onto stack slow-path
 2982     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2983     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2984     // CALL directly to the runtime
 2985     MacroAssembler _masm(&cbuf);
 2986     cbuf.set_insts_mark();
 2987     emit_opcode(cbuf,0xE8);       // Call into runtime
 2988     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2989     __ post_call_nop();
 2990     // Carry on here...
 2991   %}
 2992 
 2993   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2994     // Operand was loaded from memory into fp ST (stack top)
 2995     // FMUL   ST,$src  /* D8 C8+i */
 2996     emit_opcode(cbuf, 0xD8);
 2997     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2998   %}
 2999 
 3000   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3001     // FADDP  ST,src2  /* D8 C0+i */
 3002     emit_opcode(cbuf, 0xD8);
 3003     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3004     //could use FADDP  src2,fpST  /* DE C0+i */
 3005   %}
 3006 
 3007   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3008     // FADDP  src2,ST  /* DE C0+i */
 3009     emit_opcode(cbuf, 0xDE);

10805   effect( KILL tmp, KILL cr );
10806   format %{ "CVTTSD2SI $dst, $src\n\t"
10807             "CMP    $dst,0x80000000\n\t"
10808             "JNE,s  fast\n\t"
10809             "SUB    ESP, 8\n\t"
10810             "MOVSD  [ESP], $src\n\t"
10811             "FLD_D  [ESP]\n\t"
10812             "ADD    ESP, 8\n\t"
10813             "CALL   d2i_wrapper\n"
10814       "fast:" %}
10815   ins_encode %{
10816     Label fast;
10817     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10818     __ cmpl($dst$$Register, 0x80000000);
10819     __ jccb(Assembler::notEqual, fast);
10820     __ subptr(rsp, 8);
10821     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10822     __ fld_d(Address(rsp, 0));
10823     __ addptr(rsp, 8);
10824     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10825     __ post_call_nop();
10826     __ bind(fast);
10827   %}
10828   ins_pipe( pipe_slow );
10829 %}
10830 
10831 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10832   predicate(UseSSE<=1);
10833   match(Set dst (ConvD2L src));
10834   effect( KILL cr );
10835   format %{ "FLD    $src\t# Convert double to long\n\t"
10836             "FLDCW  trunc mode\n\t"
10837             "SUB    ESP,8\n\t"
10838             "FISTp  [ESP + #0]\n\t"
10839             "FLDCW  std/24-bit mode\n\t"
10840             "POP    EAX\n\t"
10841             "POP    EDX\n\t"
10842             "CMP    EDX,0x80000000\n\t"
10843             "JNE,s  fast\n\t"
10844             "TEST   EAX,EAX\n\t"
10845             "JNE,s  fast\n\t"

10881     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10882     __ fistp_d(Address(rsp, 0));
10883     // Restore the rounding mode, mask the exception
10884     if (Compile::current()->in_24_bit_fp_mode()) {
10885       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10886     } else {
10887       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10888     }
10889     // Load the converted long, adjust CPU stack
10890     __ pop(rax);
10891     __ pop(rdx);
10892     __ cmpl(rdx, 0x80000000);
10893     __ jccb(Assembler::notEqual, fast);
10894     __ testl(rax, rax);
10895     __ jccb(Assembler::notEqual, fast);
10896     __ subptr(rsp, 8);
10897     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10898     __ fld_d(Address(rsp, 0));
10899     __ addptr(rsp, 8);
10900     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10901     __ post_call_nop();
10902     __ bind(fast);
10903   %}
10904   ins_pipe( pipe_slow );
10905 %}
10906 
10907 // Convert a double to an int.  Java semantics require we do complex
10908 // manglations in the corner cases.  So we set the rounding mode to
10909 // 'zero', store the darned double down as an int, and reset the
10910 // rounding mode to 'nearest'.  The hardware stores a flag value down
10911 // if we would overflow or converted a NAN; we check for this and
10912 // and go the slow path if needed.
10913 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10914   predicate(UseSSE==0);
10915   match(Set dst (ConvF2I src));
10916   effect( KILL tmp, KILL cr );
10917   format %{ "FLD    $src\t# Convert float to int \n\t"
10918             "FLDCW  trunc mode\n\t"
10919             "SUB    ESP,4\n\t"
10920             "FISTp  [ESP + #0]\n\t"
10921             "FLDCW  std/24-bit mode\n\t"

10937   effect( KILL tmp, KILL cr );
10938   format %{ "CVTTSS2SI $dst, $src\n\t"
10939             "CMP    $dst,0x80000000\n\t"
10940             "JNE,s  fast\n\t"
10941             "SUB    ESP, 4\n\t"
10942             "MOVSS  [ESP], $src\n\t"
10943             "FLD    [ESP]\n\t"
10944             "ADD    ESP, 4\n\t"
10945             "CALL   d2i_wrapper\n"
10946       "fast:" %}
10947   ins_encode %{
10948     Label fast;
10949     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10950     __ cmpl($dst$$Register, 0x80000000);
10951     __ jccb(Assembler::notEqual, fast);
10952     __ subptr(rsp, 4);
10953     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10954     __ fld_s(Address(rsp, 0));
10955     __ addptr(rsp, 4);
10956     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10957     __ post_call_nop();
10958     __ bind(fast);
10959   %}
10960   ins_pipe( pipe_slow );
10961 %}
10962 
10963 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10964   predicate(UseSSE==0);
10965   match(Set dst (ConvF2L src));
10966   effect( KILL cr );
10967   format %{ "FLD    $src\t# Convert float to long\n\t"
10968             "FLDCW  trunc mode\n\t"
10969             "SUB    ESP,8\n\t"
10970             "FISTp  [ESP + #0]\n\t"
10971             "FLDCW  std/24-bit mode\n\t"
10972             "POP    EAX\n\t"
10973             "POP    EDX\n\t"
10974             "CMP    EDX,0x80000000\n\t"
10975             "JNE,s  fast\n\t"
10976             "TEST   EAX,EAX\n\t"
10977             "JNE,s  fast\n\t"

11014     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11015     __ fistp_d(Address(rsp, 0));
11016     // Restore the rounding mode, mask the exception
11017     if (Compile::current()->in_24_bit_fp_mode()) {
11018       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11019     } else {
11020       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11021     }
11022     // Load the converted long, adjust CPU stack
11023     __ pop(rax);
11024     __ pop(rdx);
11025     __ cmpl(rdx, 0x80000000);
11026     __ jccb(Assembler::notEqual, fast);
11027     __ testl(rax, rax);
11028     __ jccb(Assembler::notEqual, fast);
11029     __ subptr(rsp, 4);
11030     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11031     __ fld_s(Address(rsp, 0));
11032     __ addptr(rsp, 4);
11033     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11034     __ post_call_nop();
11035     __ bind(fast);
11036   %}
11037   ins_pipe( pipe_slow );
11038 %}
11039 
11040 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11041   predicate( UseSSE<=1 );
11042   match(Set dst (ConvI2D src));
11043   format %{ "FILD   $src\n\t"
11044             "FSTP   $dst" %}
11045   opcode(0xDB, 0x0);  /* DB /0 */
11046   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11047   ins_pipe( fpu_reg_mem );
11048 %}
11049 
11050 instruct convI2D_reg(regD dst, rRegI src) %{
11051   predicate( UseSSE>=2 && !UseXmmI2D );
11052   match(Set dst (ConvI2D src));
11053   format %{ "CVTSI2SD $dst,$src" %}
11054   ins_encode %{
< prev index next >