< prev index next >

test/hotspot/gtest/aarch64/aarch64-asmtest.py

Print this page

1536                         ["fmov",   "__ fmovhid(r0, v1);",                                "fmov\tx0, v1.d[1]"],
1537                         ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
1538                         # SVE instructions
1539                         ["cpy",     "__ sve_cpy(z0, __ S, p0, v1);",                      "mov\tz0.s, p0/m, s1"],
1540                         ["cpy",     "__ sve_cpy(z0, __ B, p0, 127, true);",               "mov\tz0.b, p0/m, 127"],
1541                         ["cpy",     "__ sve_cpy(z1, __ H, p0, -128, true);",              "mov\tz1.h, p0/m, -128"],
1542                         ["cpy",     "__ sve_cpy(z2, __ S, p0, 32512, true);",             "mov\tz2.s, p0/m, 32512"],
1543                         ["cpy",     "__ sve_cpy(z5, __ D, p0, -32768, false);",           "mov\tz5.d, p0/z, -32768"],
1544                         ["cpy",     "__ sve_cpy(z10, __ B, p0, -1, false);",              "mov\tz10.b, p0/z, -1"],
1545                         ["cpy",     "__ sve_cpy(z11, __ S, p0, -1, false);",              "mov\tz11.s, p0/z, -1"],
1546                         ["inc",     "__ sve_inc(r0, __ S);",                              "incw\tx0"],
1547                         ["dec",     "__ sve_dec(r1, __ H);",                              "dech\tx1"],
1548                         ["lsl",     "__ sve_lsl(z0, __ B, z1, 7);",                       "lsl\tz0.b, z1.b, #7"],
1549                         ["lsl",     "__ sve_lsl(z21, __ H, z1, 15);",                     "lsl\tz21.h, z1.h, #15"],
1550                         ["lsl",     "__ sve_lsl(z0, __ S, z1, 31);",                      "lsl\tz0.s, z1.s, #31"],
1551                         ["lsl",     "__ sve_lsl(z0, __ D, z1, 63);",                      "lsl\tz0.d, z1.d, #63"],
1552                         ["lsr",     "__ sve_lsr(z0, __ B, z1, 7);",                       "lsr\tz0.b, z1.b, #7"],
1553                         ["asr",     "__ sve_asr(z0, __ H, z11, 15);",                     "asr\tz0.h, z11.h, #15"],
1554                         ["lsr",     "__ sve_lsr(z30, __ S, z1, 31);",                     "lsr\tz30.s, z1.s, #31"],
1555                         ["asr",     "__ sve_asr(z0, __ D, z1, 63);",                      "asr\tz0.d, z1.d, #63"],

















1556                         ["addvl",   "__ sve_addvl(sp, r0, 31);",                          "addvl\tsp, x0, #31"],
1557                         ["addpl",   "__ sve_addpl(r1, sp, -32);",                         "addpl\tx1, sp, -32"],
1558                         ["cntp",    "__ sve_cntp(r8, __ B, p0, p1);",                     "cntp\tx8, p0, p1.b"],
1559                         ["dup",     "__ sve_dup(z0, __ B, 127);",                         "dup\tz0.b, 127"],
1560                         ["dup",     "__ sve_dup(z1, __ H, -128);",                        "dup\tz1.h, -128"],
1561                         ["dup",     "__ sve_dup(z2, __ S, 32512);",                       "dup\tz2.s, 32512"],
1562                         ["dup",     "__ sve_dup(z7, __ D, -32768);",                      "dup\tz7.d, -32768"],
1563                         ["dup",     "__ sve_dup(z10, __ B, -1);",                         "dup\tz10.b, -1"],
1564                         ["dup",     "__ sve_dup(z11, __ S, -1);",                         "dup\tz11.s, -1"],
1565                         ["ld1b",    "__ sve_ld1b(z0, __ B, p0, Address(sp));",            "ld1b\t{z0.b}, p0/z, [sp]"],
1566                         ["ld1b",    "__ sve_ld1b(z0, __ H, p1, Address(sp));",            "ld1b\t{z0.h}, p1/z, [sp]"],
1567                         ["ld1b",    "__ sve_ld1b(z0, __ S, p2, Address(sp, r8));",        "ld1b\t{z0.s}, p2/z, [sp, x8]"],
1568                         ["ld1b",    "__ sve_ld1b(z0, __ D, p3, Address(sp, 7));",         "ld1b\t{z0.d}, p3/z, [sp, #7, MUL VL]"],
1569                         ["ld1h",    "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));",       "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
1570                         ["ld1w",    "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));",        "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
1571                         ["ld1b",    "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));",       "ld1b\t{z30.b}, p3/z, [sp, x8]"],
1572                         ["ld1w",    "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));",       "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
1573                         ["ld1d",    "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));",       "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
1574                         ["st1b",    "__ sve_st1b(z22, __ B, p6, Address(sp));",           "st1b\t{z22.b}, p6, [sp]"],
1575                         ["st1b",    "__ sve_st1b(z31, __ B, p7, Address(sp, -8));",       "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],

1630                         ["scvtf",   "__ sve_scvtf(z6, __ H, p3, z1, __ D);",              "scvtf\tz6.h, p3/m, z1.d"],
1631                         ["scvtf",   "__ sve_scvtf(z6, __ H, p3, z1, __ H);",              "scvtf\tz6.h, p3/m, z1.h"],
1632                         ["fcvt",    "__ sve_fcvt(z5, __ D, p3, z4, __ S);",               "fcvt\tz5.d, p3/m, z4.s"],
1633                         ["fcvt",    "__ sve_fcvt(z1, __ S, p3, z0, __ D);",               "fcvt\tz1.s, p3/m, z0.d"],
1634                         ["fcvtzs",  "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);",            "fcvtzs\tz19.d, p2/m, z1.d"],
1635                         ["fcvtzs",  "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);",             "fcvtzs\tz9.s, p1/m, z8.s"],
1636                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);",             "fcvtzs\tz1.s, p2/m, z0.d"],
1637                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ D, p3, z0, __ S);",             "fcvtzs\tz1.d, p3/m, z0.s"],
1638                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ S, p4, z18, __ H);",            "fcvtzs\tz1.s, p4/m, z18.h"],
1639                         ["lasta",   "__ sve_lasta(r0, __ B, p0, z15);",                   "lasta\tw0, p0, z15.b"],
1640                         ["lastb",   "__ sve_lastb(r1, __ B, p1, z16);",                   "lastb\tw1, p1, z16.b"],
1641                         ["lasta",   "__ sve_lasta(v0, __ B, p0, z15);",                   "lasta\tb0, p0, z15.b"],
1642                         ["lastb",   "__ sve_lastb(v1, __ B, p1, z16);",                   "lastb\tb1, p1, z16.b"],
1643                         ["index",   "__ sve_index(z6, __ S, 1, 1);",                      "index\tz6.s, #1, #1"],
1644                         ["cpy",     "__ sve_cpy(z7, __ H, p3, r5);",                      "cpy\tz7.h, p3/m, w5"],
1645                         ["tbl",     "__ sve_tbl(z16, __ S, z17, z18);",                   "tbl\tz16.s, {z17.s}, z18.s"],
1646                         ["ld1w",    "__ sve_ld1w_gather(z15, p0, r5, z16);",              "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
1647                         ["ld1d",    "__ sve_ld1d_gather(z15, p0, r5, z16);",              "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
1648                         ["st1w",    "__ sve_st1w_scatter(z15, p0, r5, z16);",             "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
1649                         ["st1d",    "__ sve_st1d_scatter(z15, p0, r5, z16);",             "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],























1650 ])
1651 
1652 print "\n// FloatImmediateOp"
1653 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
1654               "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
1655               "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
1656               "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1657     astr = "fmov d0, #" + float
1658     cstr = "__ fmovd(v0, " + float + ");"
1659     print "    %-50s //\t%s" % (cstr, astr)
1660     outfile.write("\t" + astr + "\n")
1661 
1662 # ARMv8.1A
1663 for size in ("x", "w"):
1664     for suffix in ("", "a", "al", "l"):
1665         generate(LSEOp, [["swp", "swp", size, suffix],
1666                          ["ldadd", "ldadd", size, suffix],
1667                          ["ldbic", "ldclr", size, suffix],
1668                          ["ldeor", "ldeor", size, suffix],
1669                          ["ldorr", "ldset", size, suffix],
1670                          ["ldsmin", "ldsmin", size, suffix],
1671                          ["ldsmax", "ldsmax", size, suffix],
1672                          ["ldumin", "ldumin", size, suffix],
1673                          ["ldumax", "ldumax", size, suffix]]);
1674 
1675 # ARMv8.2A
1676 generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
1677 
1678 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1679 
1680 generate(SVEVectorOp, [["add", "ZZZ"],
1681                        ["sub", "ZZZ"],
1682                        ["fadd", "ZZZ"],
1683                        ["fmul", "ZZZ"],
1684                        ["fsub", "ZZZ"],
1685                        ["abs", "ZPZ", "m"],
1686                        ["add", "ZPZ", "m", "dn"],

1687                        ["asr", "ZPZ", "m", "dn"],
1688                        ["cnt", "ZPZ", "m"],

1689                        ["lsl", "ZPZ", "m", "dn"],
1690                        ["lsr", "ZPZ", "m", "dn"],
1691                        ["mul", "ZPZ", "m", "dn"],
1692                        ["neg", "ZPZ", "m"],
1693                        ["not", "ZPZ", "m"],

1694                        ["smax", "ZPZ", "m", "dn"],
1695                        ["smin", "ZPZ", "m", "dn"],
1696                        ["sub", "ZPZ", "m", "dn"],
1697                        ["fabs", "ZPZ", "m"],
1698                        ["fadd", "ZPZ", "m", "dn"],
1699                        ["fdiv", "ZPZ", "m", "dn"],
1700                        ["fmax", "ZPZ", "m", "dn"],
1701                        ["fmin", "ZPZ", "m", "dn"],
1702                        ["fmul", "ZPZ", "m", "dn"],
1703                        ["fneg", "ZPZ", "m"],
1704                        ["frintm", "ZPZ", "m"],
1705                        ["frintn", "ZPZ", "m"],
1706                        ["frintp", "ZPZ", "m"],
1707                        ["fsqrt", "ZPZ", "m"],
1708                        ["fsub", "ZPZ", "m", "dn"],

1709                        ["fmla", "ZPZZ", "m"],
1710                        ["fmls", "ZPZZ", "m"],
1711                        ["fnmla", "ZPZZ", "m"],
1712                        ["fnmls", "ZPZZ", "m"],
1713                        ["mla", "ZPZZ", "m"],
1714                        ["mls", "ZPZZ", "m"],
1715                        ["and", "ZZZ"],
1716                        ["eor", "ZZZ"],
1717                        ["orr", "ZZZ"],
1718                        ["bic", "ZZZ"],
1719                        ["uzp1", "ZZZ"],
1720                        ["uzp2", "ZZZ"],
1721                       ])
1722 
1723 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
1724                           ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
1725 
1726 print "\n    __ bind(forth);"
1727 outfile.write("forth:\n")
1728 

1536                         ["fmov",   "__ fmovhid(r0, v1);",                                "fmov\tx0, v1.d[1]"],
1537                         ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
1538                         # SVE instructions
1539                         ["cpy",     "__ sve_cpy(z0, __ S, p0, v1);",                      "mov\tz0.s, p0/m, s1"],
1540                         ["cpy",     "__ sve_cpy(z0, __ B, p0, 127, true);",               "mov\tz0.b, p0/m, 127"],
1541                         ["cpy",     "__ sve_cpy(z1, __ H, p0, -128, true);",              "mov\tz1.h, p0/m, -128"],
1542                         ["cpy",     "__ sve_cpy(z2, __ S, p0, 32512, true);",             "mov\tz2.s, p0/m, 32512"],
1543                         ["cpy",     "__ sve_cpy(z5, __ D, p0, -32768, false);",           "mov\tz5.d, p0/z, -32768"],
1544                         ["cpy",     "__ sve_cpy(z10, __ B, p0, -1, false);",              "mov\tz10.b, p0/z, -1"],
1545                         ["cpy",     "__ sve_cpy(z11, __ S, p0, -1, false);",              "mov\tz11.s, p0/z, -1"],
1546                         ["inc",     "__ sve_inc(r0, __ S);",                              "incw\tx0"],
1547                         ["dec",     "__ sve_dec(r1, __ H);",                              "dech\tx1"],
1548                         ["lsl",     "__ sve_lsl(z0, __ B, z1, 7);",                       "lsl\tz0.b, z1.b, #7"],
1549                         ["lsl",     "__ sve_lsl(z21, __ H, z1, 15);",                     "lsl\tz21.h, z1.h, #15"],
1550                         ["lsl",     "__ sve_lsl(z0, __ S, z1, 31);",                      "lsl\tz0.s, z1.s, #31"],
1551                         ["lsl",     "__ sve_lsl(z0, __ D, z1, 63);",                      "lsl\tz0.d, z1.d, #63"],
1552                         ["lsr",     "__ sve_lsr(z0, __ B, z1, 7);",                       "lsr\tz0.b, z1.b, #7"],
1553                         ["asr",     "__ sve_asr(z0, __ H, z11, 15);",                     "asr\tz0.h, z11.h, #15"],
1554                         ["lsr",     "__ sve_lsr(z30, __ S, z1, 31);",                     "lsr\tz30.s, z1.s, #31"],
1555                         ["asr",     "__ sve_asr(z0, __ D, z1, 63);",                      "asr\tz0.d, z1.d, #63"],
1556                         ["lsl",     "__ sve_lsl(z0, __ B, p0, 0);",                       "lsl\tz0.b, p0/m, z0.b, #0"],
1557                         ["lsl",     "__ sve_lsl(z0, __ B, p0, 5);",                       "lsl\tz0.b, p0/m, z0.b, #5"],
1558                         ["lsl",     "__ sve_lsl(z1, __ H, p1, 15);",                      "lsl\tz1.h, p1/m, z1.h, #15"],
1559                         ["lsl",     "__ sve_lsl(z2, __ S, p2, 31);",                      "lsl\tz2.s, p2/m, z2.s, #31"],
1560                         ["lsl",     "__ sve_lsl(z3, __ D, p3, 63);",                      "lsl\tz3.d, p3/m, z3.d, #63"],
1561                         ["lsr",     "__ sve_lsr(z0, __ B, p0, 1);",                       "lsr\tz0.b, p0/m, z0.b, #1"],
1562                         ["lsr",     "__ sve_lsr(z0, __ B, p0, 8);",                       "lsr\tz0.b, p0/m, z0.b, #8"],
1563                         ["lsr",     "__ sve_lsr(z1, __ H, p1, 15);",                      "lsr\tz1.h, p1/m, z1.h, #15"],
1564                         ["lsr",     "__ sve_lsr(z2, __ S, p2, 7);",                       "lsr\tz2.s, p2/m, z2.s, #7"],
1565                         ["lsr",     "__ sve_lsr(z2, __ S, p2, 31);",                      "lsr\tz2.s, p2/m, z2.s, #31"],
1566                         ["lsr",     "__ sve_lsr(z3, __ D, p3, 63);",                      "lsr\tz3.d, p3/m, z3.d, #63"],
1567                         ["asr",     "__ sve_asr(z0, __ B, p0, 1);",                       "asr\tz0.b, p0/m, z0.b, #1"],
1568                         ["asr",     "__ sve_asr(z0, __ B, p0, 7);",                       "asr\tz0.b, p0/m, z0.b, #7"],
1569                         ["asr",     "__ sve_asr(z1, __ H, p1, 5);",                       "asr\tz1.h, p1/m, z1.h, #5"],
1570                         ["asr",     "__ sve_asr(z1, __ H, p1, 15);",                      "asr\tz1.h, p1/m, z1.h, #15"],
1571                         ["asr",     "__ sve_asr(z2, __ S, p2, 31);",                      "asr\tz2.s, p2/m, z2.s, #31"],
1572                         ["asr",     "__ sve_asr(z3, __ D, p3, 63);",                      "asr\tz3.d, p3/m, z3.d, #63"],
1573                         ["addvl",   "__ sve_addvl(sp, r0, 31);",                          "addvl\tsp, x0, #31"],
1574                         ["addpl",   "__ sve_addpl(r1, sp, -32);",                         "addpl\tx1, sp, -32"],
1575                         ["cntp",    "__ sve_cntp(r8, __ B, p0, p1);",                     "cntp\tx8, p0, p1.b"],
1576                         ["dup",     "__ sve_dup(z0, __ B, 127);",                         "dup\tz0.b, 127"],
1577                         ["dup",     "__ sve_dup(z1, __ H, -128);",                        "dup\tz1.h, -128"],
1578                         ["dup",     "__ sve_dup(z2, __ S, 32512);",                       "dup\tz2.s, 32512"],
1579                         ["dup",     "__ sve_dup(z7, __ D, -32768);",                      "dup\tz7.d, -32768"],
1580                         ["dup",     "__ sve_dup(z10, __ B, -1);",                         "dup\tz10.b, -1"],
1581                         ["dup",     "__ sve_dup(z11, __ S, -1);",                         "dup\tz11.s, -1"],
1582                         ["ld1b",    "__ sve_ld1b(z0, __ B, p0, Address(sp));",            "ld1b\t{z0.b}, p0/z, [sp]"],
1583                         ["ld1b",    "__ sve_ld1b(z0, __ H, p1, Address(sp));",            "ld1b\t{z0.h}, p1/z, [sp]"],
1584                         ["ld1b",    "__ sve_ld1b(z0, __ S, p2, Address(sp, r8));",        "ld1b\t{z0.s}, p2/z, [sp, x8]"],
1585                         ["ld1b",    "__ sve_ld1b(z0, __ D, p3, Address(sp, 7));",         "ld1b\t{z0.d}, p3/z, [sp, #7, MUL VL]"],
1586                         ["ld1h",    "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));",       "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
1587                         ["ld1w",    "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));",        "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
1588                         ["ld1b",    "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));",       "ld1b\t{z30.b}, p3/z, [sp, x8]"],
1589                         ["ld1w",    "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));",       "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
1590                         ["ld1d",    "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));",       "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
1591                         ["st1b",    "__ sve_st1b(z22, __ B, p6, Address(sp));",           "st1b\t{z22.b}, p6, [sp]"],
1592                         ["st1b",    "__ sve_st1b(z31, __ B, p7, Address(sp, -8));",       "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],

1647                         ["scvtf",   "__ sve_scvtf(z6, __ H, p3, z1, __ D);",              "scvtf\tz6.h, p3/m, z1.d"],
1648                         ["scvtf",   "__ sve_scvtf(z6, __ H, p3, z1, __ H);",              "scvtf\tz6.h, p3/m, z1.h"],
1649                         ["fcvt",    "__ sve_fcvt(z5, __ D, p3, z4, __ S);",               "fcvt\tz5.d, p3/m, z4.s"],
1650                         ["fcvt",    "__ sve_fcvt(z1, __ S, p3, z0, __ D);",               "fcvt\tz1.s, p3/m, z0.d"],
1651                         ["fcvtzs",  "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);",            "fcvtzs\tz19.d, p2/m, z1.d"],
1652                         ["fcvtzs",  "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);",             "fcvtzs\tz9.s, p1/m, z8.s"],
1653                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);",             "fcvtzs\tz1.s, p2/m, z0.d"],
1654                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ D, p3, z0, __ S);",             "fcvtzs\tz1.d, p3/m, z0.s"],
1655                         ["fcvtzs",  "__ sve_fcvtzs(z1, __ S, p4, z18, __ H);",            "fcvtzs\tz1.s, p4/m, z18.h"],
1656                         ["lasta",   "__ sve_lasta(r0, __ B, p0, z15);",                   "lasta\tw0, p0, z15.b"],
1657                         ["lastb",   "__ sve_lastb(r1, __ B, p1, z16);",                   "lastb\tw1, p1, z16.b"],
1658                         ["lasta",   "__ sve_lasta(v0, __ B, p0, z15);",                   "lasta\tb0, p0, z15.b"],
1659                         ["lastb",   "__ sve_lastb(v1, __ B, p1, z16);",                   "lastb\tb1, p1, z16.b"],
1660                         ["index",   "__ sve_index(z6, __ S, 1, 1);",                      "index\tz6.s, #1, #1"],
1661                         ["cpy",     "__ sve_cpy(z7, __ H, p3, r5);",                      "cpy\tz7.h, p3/m, w5"],
1662                         ["tbl",     "__ sve_tbl(z16, __ S, z17, z18);",                   "tbl\tz16.s, {z17.s}, z18.s"],
1663                         ["ld1w",    "__ sve_ld1w_gather(z15, p0, r5, z16);",              "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
1664                         ["ld1d",    "__ sve_ld1d_gather(z15, p0, r5, z16);",              "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
1665                         ["st1w",    "__ sve_st1w_scatter(z15, p0, r5, z16);",             "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
1666                         ["st1d",    "__ sve_st1d_scatter(z15, p0, r5, z16);",             "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],
1667                         ["and",     "__ sve_and(p0, p1, p2, p3);",                        "and\tp0.b, p1/z, p2.b, p3.b"],
1668                         ["ands",    "__ sve_ands(p4, p5, p6, p0);",                       "ands\tp4.b, p5/z, p6.b, p0.b"],
1669                         ["eor",     "__ sve_eor(p0, p1, p2, p3);",                        "eor\tp0.b, p1/z, p2.b, p3.b"],
1670                         ["eors",    "__ sve_eors(p5, p6, p0, p1);",                       "eors\tp5.b, p6/z, p0.b, p1.b"],
1671                         ["orr",     "__ sve_orr(p0, p1, p2, p3);",                        "orr\tp0.b, p1/z, p2.b, p3.b"],
1672                         ["orrs",    "__ sve_orrs(p9, p1, p4, p5);",                       "orrs\tp9.b, p1/z, p4.b, p5.b"],
1673                         ["bic",     "__ sve_bic(p10, p7, p9, p11);",                      "bic\tp10.b, p7/z, p9.b, p11.b"],
1674                         ["ptest",   "__ sve_ptest(p7, p1);",                              "ptest\tp7, p1.b"],
1675                         ["ptrue",   "__ sve_ptrue(p1, __ B);",                            "ptrue\tp1.b"],
1676                         ["ptrue",   "__ sve_ptrue(p2, __ H);",                            "ptrue\tp2.h"],
1677                         ["ptrue",   "__ sve_ptrue(p3, __ S);",                            "ptrue\tp3.s"],
1678                         ["ptrue",   "__ sve_ptrue(p4, __ D);",                            "ptrue\tp4.d"],
1679                         ["pfalse",  "__ sve_pfalse(p7);",                                 "pfalse\tp7.b"],
1680                         ["uzp1",    "__ sve_uzp1(p0, __ B, p0, p1);",                     "uzp1\tp0.b, p0.b, p1.b"],
1681                         ["uzp1",    "__ sve_uzp1(p0, __ H, p0, p1);",                     "uzp1\tp0.h, p0.h, p1.h"],
1682                         ["uzp1",    "__ sve_uzp1(p0, __ S, p0, p1);",                     "uzp1\tp0.s, p0.s, p1.s"],
1683                         ["uzp1",    "__ sve_uzp1(p0, __ D, p0, p1);",                     "uzp1\tp0.d, p0.d, p1.d"],
1684                         ["uzp2",    "__ sve_uzp2(p0, __ B, p0, p1);",                     "uzp2\tp0.b, p0.b, p1.b"],
1685                         ["uzp2",    "__ sve_uzp2(p0, __ H, p0, p1);",                     "uzp2\tp0.h, p0.h, p1.h"],
1686                         ["uzp2",    "__ sve_uzp2(p0, __ S, p0, p1);",                     "uzp2\tp0.s, p0.s, p1.s"],
1687                         ["uzp2",    "__ sve_uzp2(p0, __ D, p0, p1);",                     "uzp2\tp0.d, p0.d, p1.d"],
1688                         ["punpklo", "__ sve_punpklo(p1, p0);",                            "punpklo\tp1.h, p0.b"],
1689                         ["punpkhi", "__ sve_punpkhi(p1, p0);",                            "punpkhi\tp1.h, p0.b"],
1690 ])
1691 
1692 print "\n// FloatImmediateOp"
1693 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
1694               "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
1695               "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
1696               "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1697     astr = "fmov d0, #" + float
1698     cstr = "__ fmovd(v0, " + float + ");"
1699     print "    %-50s //\t%s" % (cstr, astr)
1700     outfile.write("\t" + astr + "\n")
1701 
1702 # ARMv8.1A
1703 for size in ("x", "w"):
1704     for suffix in ("", "a", "al", "l"):
1705         generate(LSEOp, [["swp", "swp", size, suffix],
1706                          ["ldadd", "ldadd", size, suffix],
1707                          ["ldbic", "ldclr", size, suffix],
1708                          ["ldeor", "ldeor", size, suffix],
1709                          ["ldorr", "ldset", size, suffix],
1710                          ["ldsmin", "ldsmin", size, suffix],
1711                          ["ldsmax", "ldsmax", size, suffix],
1712                          ["ldumin", "ldumin", size, suffix],
1713                          ["ldumax", "ldumax", size, suffix]]);
1714 
1715 # ARMv8.2A
1716 generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
1717 
1718 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1719 
1720 generate(SVEVectorOp, [["add", "ZZZ"],
1721                        ["sub", "ZZZ"],
1722                        ["fadd", "ZZZ"],
1723                        ["fmul", "ZZZ"],
1724                        ["fsub", "ZZZ"],
1725                        ["abs", "ZPZ", "m"],
1726                        ["add", "ZPZ", "m", "dn"],
1727                        ["and", "ZPZ", "m", "dn"],
1728                        ["asr", "ZPZ", "m", "dn"],
1729                        ["cnt", "ZPZ", "m"],
1730                        ["eor", "ZPZ", "m", "dn"],
1731                        ["lsl", "ZPZ", "m", "dn"],
1732                        ["lsr", "ZPZ", "m", "dn"],
1733                        ["mul", "ZPZ", "m", "dn"],
1734                        ["neg", "ZPZ", "m"],
1735                        ["not", "ZPZ", "m"],
1736                        ["orr", "ZPZ", "m", "dn"],
1737                        ["smax", "ZPZ", "m", "dn"],
1738                        ["smin", "ZPZ", "m", "dn"],
1739                        ["sub", "ZPZ", "m", "dn"],
1740                        ["fabs", "ZPZ", "m"],
1741                        ["fadd", "ZPZ", "m", "dn"],
1742                        ["fdiv", "ZPZ", "m", "dn"],
1743                        ["fmax", "ZPZ", "m", "dn"],
1744                        ["fmin", "ZPZ", "m", "dn"],
1745                        ["fmul", "ZPZ", "m", "dn"],
1746                        ["fneg", "ZPZ", "m"],
1747                        ["frintm", "ZPZ", "m"],
1748                        ["frintn", "ZPZ", "m"],
1749                        ["frintp", "ZPZ", "m"],
1750                        ["fsqrt", "ZPZ", "m"],
1751                        ["fsub", "ZPZ", "m", "dn"],
1752                        ["fmad", "ZPZZ", "m"],
1753                        ["fmla", "ZPZZ", "m"],
1754                        ["fmls", "ZPZZ", "m"],
1755                        ["fnmla", "ZPZZ", "m"],
1756                        ["fnmls", "ZPZZ", "m"],
1757                        ["mla", "ZPZZ", "m"],
1758                        ["mls", "ZPZZ", "m"],
1759                        ["and", "ZZZ"],
1760                        ["eor", "ZZZ"],
1761                        ["orr", "ZZZ"],
1762                        ["bic", "ZZZ"],
1763                        ["uzp1", "ZZZ"],
1764                        ["uzp2", "ZZZ"],
1765                       ])
1766 
1767 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
1768                           ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
1769 
1770 print "\n    __ bind(forth);"
1771 outfile.write("forth:\n")
1772 
< prev index next >