< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64_trig.cpp

Print this page

        

*** 379,393 **** movw(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop fsubd(v26, v26, v7); } block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ! fcmpd(v26, 0.0); // if NE then jx == 2. else it's 1 or 0 add(iqBase, sp, 480); // base of iq[] fmuld(v3, v26, v10); br(NE, NX_SET); ! fcmpd(v7, 0.0); // v7 == 0 => jx = 0. Else jx = 1 csetw(jx, NE); } bind(NX_SET); generate__kernel_rem_pio2(two_over_pi, pio2); // now we have y[0] = v4, y[1] = v5 and n = r2 --- 379,393 ---- movw(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop fsubd(v26, v26, v7); } block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ! fcmpd(v26, 0.0d); // if NE then jx == 2. else it's 1 or 0 add(iqBase, sp, 480); // base of iq[] fmuld(v3, v26, v10); br(NE, NX_SET); ! fcmpd(v7, 0.0d); // v7 == 0 => jx = 0. Else jx = 1 csetw(jx, NE); } bind(NX_SET); generate__kernel_rem_pio2(two_over_pi, pio2); // now we have y[0] = v4, y[1] = v5 and n = r2
*** 694,704 **** // jx = nx - 1 lea(twoOverPiBase, ExternalAddress(two_over_pi)); cmpw(jv, zr); addw(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] cselw(jv, jv, zr, GE); ! fmovd(v26, 0.0); addw(tmp5, jv, 1); // jv+1 subsw(j, jv, jx); add(qBase, sp, 320); // base of q[] msubw(rscratch1, i, tmp5, rscratch1); // q0 = e0-24*(jv+1) // use double f[20], fq[20], q[20], iq[20] on stack, which is --- 694,704 ---- // jx = nx - 1 lea(twoOverPiBase, ExternalAddress(two_over_pi)); cmpw(jv, zr); addw(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] cselw(jv, jv, zr, GE); ! fmovd(v26, 0.0d); addw(tmp5, jv, 1); // jv+1 subsw(j, jv, jx); add(qBase, sp, 320); // base of q[] msubw(rscratch1, i, tmp5, rscratch1); // q0 = e0-24*(jv+1) // use double f[20], fq[20], q[20], iq[20] on stack, which is
*** 817,828 **** } movz(i, 0x3E70, 48); movw(jz, 4); fmovd(v17, i); // v17 = twon24 fmovd(v30, tmp5); // 2^q0 ! fmovd(v21, 0.125); ! fmovd(v20, 8.0); fmovd(v22, tmp4); // 2^-q0 block_comment("recompute loop"); { bind(RECOMPUTE); // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { --- 817,828 ---- } movz(i, 0x3E70, 48); movw(jz, 4); fmovd(v17, i); // v17 = twon24 fmovd(v30, tmp5); // 2^q0 ! fmovd(v21, 0.125d); ! fmovd(v20, 8.0d); fmovd(v22, tmp4); // 2^-q0 block_comment("recompute loop"); { bind(RECOMPUTE); // for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
*** 875,885 **** b(Q0_ZERO_CMP_DONE); bind(Q0_ZERO_CMP_EQ); lsr(ih, tmp2, 23); // ih = iq[z-1] >> 23 b(Q0_ZERO_CMP_DONE); bind(Q0_ZERO_CMP_LT); ! fmovd(v4, 0.5); fcmpd(v18, v4); cselw(ih, zr, ih, LT); // if (z<0.5) ih = 0 } bind(Q0_ZERO_CMP_DONE); cmpw(ih, zr); --- 875,885 ---- b(Q0_ZERO_CMP_DONE); bind(Q0_ZERO_CMP_EQ); lsr(ih, tmp2, 23); // ih = iq[z-1] >> 23 b(Q0_ZERO_CMP_DONE); bind(Q0_ZERO_CMP_LT); ! fmovd(v4, 0.5d); fcmpd(v18, v4); cselw(ih, zr, ih, LT); // if (z<0.5) ih = 0 } bind(Q0_ZERO_CMP_DONE); cmpw(ih, zr);
*** 922,940 **** bind(IH_AFTER_SWITCH); cmpw(ih, 2); br(NE, IH_HANDLED); block_comment("if(ih==2) {"); { ! fmovd(v25, 1.0); fsubd(v18, v25, v18); // z = one - z; cbzw(rscratch2, IH_HANDLED); fsubd(v18, v18, v30); // z -= scalbnA(one,q0); } } bind(IH_HANDLED); // check if recomputation is needed ! fcmpd(v18, 0.0); br(NE, RECOMP_CHECK_DONE_NOT_ZERO); block_comment("if(z==zeroB) {"); { block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { --- 922,940 ---- bind(IH_AFTER_SWITCH); cmpw(ih, 2); br(NE, IH_HANDLED); block_comment("if(ih==2) {"); { ! fmovd(v25, 1.0d); fsubd(v18, v25, v18); // z = one - z; cbzw(rscratch2, IH_HANDLED); fsubd(v18, v18, v30); // z -= scalbnA(one,q0); } } bind(IH_HANDLED); // check if recomputation is needed ! fcmpd(v18, 0.0d); br(NE, RECOMP_CHECK_DONE_NOT_ZERO); block_comment("if(z==zeroB) {"); { block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); {
*** 992,1002 **** } } } bind(RECOMP_CHECK_DONE); // chop off zero terms ! fcmpd(v18, 0.0); br(EQ, Z_IS_ZERO); block_comment("else block of if(z==0.0) {"); { bind(RECOMP_CHECK_DONE_NOT_ZERO); fmuld(v18, v18, v22); --- 992,1002 ---- } } } bind(RECOMP_CHECK_DONE); // chop off zero terms ! fcmpd(v18, 0.0d); br(EQ, Z_IS_ZERO); block_comment("else block of if(z==0.0) {"); { bind(RECOMP_CHECK_DONE_NOT_ZERO); fmuld(v18, v18, v22);
*** 1051,1061 **** block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { movw(i, jz); movw(tmp2, zr); // tmp2 will keep jz - i == 0 at start bind(COMP_FOR); // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ! fmovd(v30, 0.0); add(tmp5, qBase, i, LSL, 3); // address of q[i+k] for k==0 movw(tmp3, 4); movw(tmp4, zr); // used as k cmpw(tmp2, 4); add(tmp1, qBase, i, LSL, 3); // used as q[i] address --- 1051,1061 ---- block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { movw(i, jz); movw(tmp2, zr); // tmp2 will keep jz - i == 0 at start bind(COMP_FOR); // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ! fmovd(v30, 0.0d); add(tmp5, qBase, i, LSL, 3); // address of q[i+k] for k==0 movw(tmp3, 4); movw(tmp4, zr); // used as k cmpw(tmp2, 4); add(tmp1, qBase, i, LSL, 3); // used as q[i] address
*** 1079,1089 **** block_comment("switch(prec) {...}. case 2:"); { // compress fq into y[] // remember prec == 2 block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ! fmovd(v4, 0.0); mov(i, jz); bind(FW_FOR1); ldrd(v1, Address(rscratch2, i, Address::lsl(3))); subsw(i, i, 1); faddd(v4, v4, v1); --- 1079,1089 ---- block_comment("switch(prec) {...}. case 2:"); { // compress fq into y[] // remember prec == 2 block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ! fmovd(v4, 0.0d); mov(i, jz); bind(FW_FOR1); ldrd(v1, Address(rscratch2, i, Address::lsl(3))); subsw(i, i, 1); faddd(v4, v4, v1);
*** 1317,1337 **** ldpd(C5, C6, Address(rscratch2, 32)); // load C5, C6 fmuld(z, x, x); // z=x^2 ld1(C1, C2, C3, C4, T1D, Address(rscratch2)); // load C1..C3\4 block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { fmaddd(r, z, C6, C5); ! fmovd(half, 0.5); fmaddd(r, z, r, C4); fmuld(y, x, y); fmaddd(r, z, r, C3); mov(rscratch1, 0x3FD33333); fmaddd(r, z, r, C2); fmuld(x, z, z); // x = z^2 fmaddd(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) } // need to multiply r by z to have "final" r value ! fmovd(one, 1.0); cmp(ix, rscratch1); br(GT, IX_IS_LARGE); block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) fmsubd(v0, x, r, y); --- 1317,1337 ---- ldpd(C5, C6, Address(rscratch2, 32)); // load C5, C6 fmuld(z, x, x); // z=x^2 ld1(C1, C2, C3, C4, T1D, Address(rscratch2)); // load C1..C3\4 block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { fmaddd(r, z, C6, C5); ! fmovd(half, 0.5d); fmaddd(r, z, r, C4); fmuld(y, x, y); fmaddd(r, z, r, C3); mov(rscratch1, 0x3FD33333); fmaddd(r, z, r, C2); fmuld(x, z, z); // x = z^2 fmaddd(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) } // need to multiply r by z to have "final" r value ! fmovd(one, 1.0d); cmp(ix, rscratch1); br(GT, IX_IS_LARGE); block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) fmsubd(v0, x, r, y);
*** 1350,1360 **** fmovd(qx, rscratch2); } b(QX_SET); bind(SET_QX_CONST); block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ! fmovd(qx, 0.28125); } bind(QX_SET); fnmsub(C6, x, r, y); // z*r - xy fnmsub(h, half, z, qx); // h = 0.5*z - qx fsubd(a, one, qx); // a = 1-qx --- 1350,1360 ---- fmovd(qx, rscratch2); } b(QX_SET); bind(SET_QX_CONST); block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ! fmovd(qx, 0.28125d); } bind(QX_SET); fnmsub(C6, x, r, y); // z*r - xy fnmsub(h, half, z, qx); // h = 0.5*z - qx fsubd(a, one, qx); // a = 1-qx
*** 1441,1451 **** ret(lr); } block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); { bind(TINY_X); if (isCos) { ! fmovd(v0, 1.0); } ret(lr); } bind(ARG_REDUCTION); /* argument reduction needed */ block_comment("n = __ieee754_rem_pio2(x,y);"); { --- 1441,1451 ---- ret(lr); } block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); { bind(TINY_X); if (isCos) { ! fmovd(v0, 1.0d); } ret(lr); } bind(ARG_REDUCTION); /* argument reduction needed */ block_comment("n = __ieee754_rem_pio2(x,y);"); {
< prev index next >