1752 ["sunpklo", "__ sve_sunpklo(z10, __ H, z11);", "sunpklo\tz10.h, z11.b"],
1753 ["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
1754 ["scvtf", "__ sve_scvtf(z3, __ D, p1, z2, __ D);", "scvtf\tz3.d, p1/m, z2.d"],
1755 ["scvtf", "__ sve_scvtf(z6, __ S, p2, z1, __ D);", "scvtf\tz6.s, p2/m, z1.d"],
1756 ["scvtf", "__ sve_scvtf(z6, __ S, p3, z1, __ S);", "scvtf\tz6.s, p3/m, z1.s"],
1757 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ S);", "scvtf\tz6.h, p3/m, z1.s"],
1758 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ D);", "scvtf\tz6.h, p3/m, z1.d"],
1759 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ H);", "scvtf\tz6.h, p3/m, z1.h"],
1760 ["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
1761 ["fcvt", "__ sve_fcvt(z1, __ S, p3, z0, __ D);", "fcvt\tz1.s, p3/m, z0.d"],
1762 ["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);", "fcvtzs\tz19.d, p2/m, z1.d"],
1763 ["fcvtzs", "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);", "fcvtzs\tz9.s, p1/m, z8.s"],
1764 ["fcvtzs", "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);", "fcvtzs\tz1.s, p2/m, z0.d"],
1765 ["fcvtzs", "__ sve_fcvtzs(z1, __ D, p3, z0, __ S);", "fcvtzs\tz1.d, p3/m, z0.s"],
1766 ["fcvtzs", "__ sve_fcvtzs(z1, __ S, p4, z18, __ H);", "fcvtzs\tz1.s, p4/m, z18.h"],
1767 ["lasta", "__ sve_lasta(r0, __ B, p0, z15);", "lasta\tw0, p0, z15.b"],
1768 ["lastb", "__ sve_lastb(r1, __ B, p1, z16);", "lastb\tw1, p1, z16.b"],
1769 ["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
1770 ["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
1771 ["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"],
1772 ["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
1773 ["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
1774 ["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
1775 ["ld1d", "__ sve_ld1d_gather(z15, p0, r5, z16);", "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
1776 ["st1w", "__ sve_st1w_scatter(z15, p0, r5, z16);", "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
1777 ["st1d", "__ sve_st1d_scatter(z15, p0, r5, z16);", "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],
1778 ["and", "__ sve_and(p0, p1, p2, p3);", "and\tp0.b, p1/z, p2.b, p3.b"],
1779 ["ands", "__ sve_ands(p4, p5, p6, p0);", "ands\tp4.b, p5/z, p6.b, p0.b"],
1780 ["eor", "__ sve_eor(p0, p1, p2, p3);", "eor\tp0.b, p1/z, p2.b, p3.b"],
1781 ["eors", "__ sve_eors(p5, p6, p0, p1);", "eors\tp5.b, p6/z, p0.b, p1.b"],
1782 ["orr", "__ sve_orr(p0, p1, p2, p3);", "orr\tp0.b, p1/z, p2.b, p3.b"],
1783 ["orrs", "__ sve_orrs(p9, p1, p4, p5);", "orrs\tp9.b, p1/z, p4.b, p5.b"],
1784 ["bic", "__ sve_bic(p10, p7, p9, p11);", "bic\tp10.b, p7/z, p9.b, p11.b"],
1785 ["ptest", "__ sve_ptest(p7, p1);", "ptest\tp7, p1.b"],
1786 ["ptrue", "__ sve_ptrue(p1, __ B);", "ptrue\tp1.b"],
1787 ["ptrue", "__ sve_ptrue(p1, __ B, 0b00001);", "ptrue\tp1.b, vl1"],
1788 ["ptrue", "__ sve_ptrue(p1, __ B, 0b00101);", "ptrue\tp1.b, vl5"],
1789 ["ptrue", "__ sve_ptrue(p1, __ B, 0b01001);", "ptrue\tp1.b, vl16"],
1790 ["ptrue", "__ sve_ptrue(p1, __ B, 0b01101);", "ptrue\tp1.b, vl256"],
1791 ["ptrue", "__ sve_ptrue(p2, __ H);", "ptrue\tp2.h"],
1794 ["ptrue", "__ sve_ptrue(p2, __ H, 0b01010);", "ptrue\tp2.h, vl32"],
1795 ["ptrue", "__ sve_ptrue(p3, __ S);", "ptrue\tp3.s"],
1796 ["ptrue", "__ sve_ptrue(p3, __ S, 0b00011);", "ptrue\tp3.s, vl3"],
1797 ["ptrue", "__ sve_ptrue(p3, __ S, 0b00111);", "ptrue\tp3.s, vl7"],
1798 ["ptrue", "__ sve_ptrue(p3, __ S, 0b01011);", "ptrue\tp3.s, vl64"],
1799 ["ptrue", "__ sve_ptrue(p4, __ D);", "ptrue\tp4.d"],
1800 ["ptrue", "__ sve_ptrue(p4, __ D, 0b00100);", "ptrue\tp4.d, vl4"],
1801 ["ptrue", "__ sve_ptrue(p4, __ D, 0b01000);", "ptrue\tp4.d, vl8"],
1802 ["ptrue", "__ sve_ptrue(p4, __ D, 0b01100);", "ptrue\tp4.d, vl128"],
1803 ["pfalse", "__ sve_pfalse(p7);", "pfalse\tp7.b"],
1804 ["uzp1", "__ sve_uzp1(p0, __ B, p0, p1);", "uzp1\tp0.b, p0.b, p1.b"],
1805 ["uzp1", "__ sve_uzp1(p0, __ H, p0, p1);", "uzp1\tp0.h, p0.h, p1.h"],
1806 ["uzp1", "__ sve_uzp1(p0, __ S, p0, p1);", "uzp1\tp0.s, p0.s, p1.s"],
1807 ["uzp1", "__ sve_uzp1(p0, __ D, p0, p1);", "uzp1\tp0.d, p0.d, p1.d"],
1808 ["uzp2", "__ sve_uzp2(p0, __ B, p0, p1);", "uzp2\tp0.b, p0.b, p1.b"],
1809 ["uzp2", "__ sve_uzp2(p0, __ H, p0, p1);", "uzp2\tp0.h, p0.h, p1.h"],
1810 ["uzp2", "__ sve_uzp2(p0, __ S, p0, p1);", "uzp2\tp0.s, p0.s, p1.s"],
1811 ["uzp2", "__ sve_uzp2(p0, __ D, p0, p1);", "uzp2\tp0.d, p0.d, p1.d"],
1812 ["punpklo", "__ sve_punpklo(p1, p0);", "punpklo\tp1.h, p0.b"],
1813 ["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
1814 ["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
1815 ])
1816
1817 print "\n// FloatImmediateOp"
1818 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
1819 "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
1820 "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
1821 "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1822 astr = "fmov d0, #" + float
1823 cstr = "__ fmovd(v0, " + float + ");"
1824 print " %-50s //\t%s" % (cstr, astr)
1825 outfile.write("\t" + astr + "\n")
1826
1827 # ARMv8.1A
1828 for size in ("x", "w"):
1829 for suffix in ("", "a", "al", "l"):
1830 generate(LSEOp, [["swp", "swp", size, suffix],
1831 ["ldadd", "ldadd", size, suffix],
1832 ["ldbic", "ldclr", size, suffix],
1833 ["ldeor", "ldeor", size, suffix],
1834 ["ldorr", "ldset", size, suffix],
1838 ["ldumax", "ldumax", size, suffix]]);
1839
1840 # ARMv8.2A
1841 generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
1842
1843 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1844
1845 for i in range(6):
1846 generate(SVEBinaryImmOp, ["add", "sub", "and", "eor", "orr"])
1847
1848 generate(SVEVectorOp, [["add", "ZZZ"],
1849 ["sub", "ZZZ"],
1850 ["fadd", "ZZZ"],
1851 ["fmul", "ZZZ"],
1852 ["fsub", "ZZZ"],
1853 ["abs", "ZPZ", "m"],
1854 ["add", "ZPZ", "m", "dn"],
1855 ["and", "ZPZ", "m", "dn"],
1856 ["asr", "ZPZ", "m", "dn"],
1857 ["bic", "ZPZ", "m", "dn"],
1858 ["cnt", "ZPZ", "m"],
1859 ["eor", "ZPZ", "m", "dn"],
1860 ["lsl", "ZPZ", "m", "dn"],
1861 ["lsr", "ZPZ", "m", "dn"],
1862 ["mul", "ZPZ", "m", "dn"],
1863 ["neg", "ZPZ", "m"],
1864 ["not", "ZPZ", "m"],
1865 ["orr", "ZPZ", "m", "dn"],
1866 ["smax", "ZPZ", "m", "dn"],
1867 ["smin", "ZPZ", "m", "dn"],
1868 ["sub", "ZPZ", "m", "dn"],
1869 ["fabs", "ZPZ", "m"],
1870 ["fadd", "ZPZ", "m", "dn"],
1871 ["fdiv", "ZPZ", "m", "dn"],
1872 ["fmax", "ZPZ", "m", "dn"],
1873 ["fmin", "ZPZ", "m", "dn"],
1874 ["fmul", "ZPZ", "m", "dn"],
1875 ["fneg", "ZPZ", "m"],
1876 ["frintm", "ZPZ", "m"],
1877 ["frintn", "ZPZ", "m"],
1878 ["frintp", "ZPZ", "m"],
1879 ["fsqrt", "ZPZ", "m"],
1880 ["fsub", "ZPZ", "m", "dn"],
1881 ["fmad", "ZPZZ", "m"],
1882 ["fmla", "ZPZZ", "m"],
1883 ["fmls", "ZPZZ", "m"],
1884 ["fmsb", "ZPZZ", "m"],
1885 ["fnmad", "ZPZZ", "m"],
1887 ["fnmla", "ZPZZ", "m"],
1888 ["fnmls", "ZPZZ", "m"],
1889 ["mla", "ZPZZ", "m"],
1890 ["mls", "ZPZZ", "m"],
1891 ["and", "ZZZ"],
1892 ["eor", "ZZZ"],
1893 ["orr", "ZZZ"],
1894 ["bic", "ZZZ"],
1895 ["uzp1", "ZZZ"],
1896 ["uzp2", "ZZZ"],
1897 ])
1898
1899 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
1900 ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
1901
1902 print "\n __ bind(forth);"
1903 outfile.write("forth:\n")
1904
1905 outfile.close()
1906
1907 # compile for sve with 8.3 and sha3 because of SHA3 crypto extension.
1908 subprocess.check_call([AARCH64_AS, "-march=armv8.3-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
1909
1910 print
1911 print "/*"
1912 print "*/"
1913
1914 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
1915
1916 infile = open("aarch64ops.bin", "r")
1917 bytes = bytearray(infile.read())
1918
1919 print
1920 print " static const unsigned int insns[] ="
1921 print " {"
1922
1923 i = 0
1924 while i < len(bytes):
1925 print " 0x%02x%02x%02x%02x," % (bytes[i+3], bytes[i+2], bytes[i+1], bytes[i]),
1926 i += 4
1927 if i%16 == 0:
1928 print
|
1752 ["sunpklo", "__ sve_sunpklo(z10, __ H, z11);", "sunpklo\tz10.h, z11.b"],
1753 ["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
1754 ["scvtf", "__ sve_scvtf(z3, __ D, p1, z2, __ D);", "scvtf\tz3.d, p1/m, z2.d"],
1755 ["scvtf", "__ sve_scvtf(z6, __ S, p2, z1, __ D);", "scvtf\tz6.s, p2/m, z1.d"],
1756 ["scvtf", "__ sve_scvtf(z6, __ S, p3, z1, __ S);", "scvtf\tz6.s, p3/m, z1.s"],
1757 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ S);", "scvtf\tz6.h, p3/m, z1.s"],
1758 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ D);", "scvtf\tz6.h, p3/m, z1.d"],
1759 ["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ H);", "scvtf\tz6.h, p3/m, z1.h"],
1760 ["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
1761 ["fcvt", "__ sve_fcvt(z1, __ S, p3, z0, __ D);", "fcvt\tz1.s, p3/m, z0.d"],
1762 ["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);", "fcvtzs\tz19.d, p2/m, z1.d"],
1763 ["fcvtzs", "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);", "fcvtzs\tz9.s, p1/m, z8.s"],
1764 ["fcvtzs", "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);", "fcvtzs\tz1.s, p2/m, z0.d"],
1765 ["fcvtzs", "__ sve_fcvtzs(z1, __ D, p3, z0, __ S);", "fcvtzs\tz1.d, p3/m, z0.s"],
1766 ["fcvtzs", "__ sve_fcvtzs(z1, __ S, p4, z18, __ H);", "fcvtzs\tz1.s, p4/m, z18.h"],
1767 ["lasta", "__ sve_lasta(r0, __ B, p0, z15);", "lasta\tw0, p0, z15.b"],
1768 ["lastb", "__ sve_lastb(r1, __ B, p1, z16);", "lastb\tw1, p1, z16.b"],
1769 ["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
1770 ["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
1771 ["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"],
1772 ["index", "__ sve_index(z6, __ B, r5, 2);", "index\tz6.b, w5, #2"],
1773 ["index", "__ sve_index(z6, __ H, r5, 3);", "index\tz6.h, w5, #3"],
1774 ["index", "__ sve_index(z6, __ S, r5, 4);", "index\tz6.s, w5, #4"],
1775 ["index", "__ sve_index(z7, __ D, r5, 5);", "index\tz7.d, x5, #5"],
1776 ["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
1777 ["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
1778 ["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
1779 ["ld1d", "__ sve_ld1d_gather(z15, p0, r5, z16);", "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
1780 ["st1w", "__ sve_st1w_scatter(z15, p0, r5, z16);", "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
1781 ["st1d", "__ sve_st1d_scatter(z15, p0, r5, z16);", "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],
1782 ["and", "__ sve_and(p0, p1, p2, p3);", "and\tp0.b, p1/z, p2.b, p3.b"],
1783 ["ands", "__ sve_ands(p4, p5, p6, p0);", "ands\tp4.b, p5/z, p6.b, p0.b"],
1784 ["eor", "__ sve_eor(p0, p1, p2, p3);", "eor\tp0.b, p1/z, p2.b, p3.b"],
1785 ["eors", "__ sve_eors(p5, p6, p0, p1);", "eors\tp5.b, p6/z, p0.b, p1.b"],
1786 ["orr", "__ sve_orr(p0, p1, p2, p3);", "orr\tp0.b, p1/z, p2.b, p3.b"],
1787 ["orrs", "__ sve_orrs(p9, p1, p4, p5);", "orrs\tp9.b, p1/z, p4.b, p5.b"],
1788 ["bic", "__ sve_bic(p10, p7, p9, p11);", "bic\tp10.b, p7/z, p9.b, p11.b"],
1789 ["ptest", "__ sve_ptest(p7, p1);", "ptest\tp7, p1.b"],
1790 ["ptrue", "__ sve_ptrue(p1, __ B);", "ptrue\tp1.b"],
1791 ["ptrue", "__ sve_ptrue(p1, __ B, 0b00001);", "ptrue\tp1.b, vl1"],
1792 ["ptrue", "__ sve_ptrue(p1, __ B, 0b00101);", "ptrue\tp1.b, vl5"],
1793 ["ptrue", "__ sve_ptrue(p1, __ B, 0b01001);", "ptrue\tp1.b, vl16"],
1794 ["ptrue", "__ sve_ptrue(p1, __ B, 0b01101);", "ptrue\tp1.b, vl256"],
1795 ["ptrue", "__ sve_ptrue(p2, __ H);", "ptrue\tp2.h"],
1798 ["ptrue", "__ sve_ptrue(p2, __ H, 0b01010);", "ptrue\tp2.h, vl32"],
1799 ["ptrue", "__ sve_ptrue(p3, __ S);", "ptrue\tp3.s"],
1800 ["ptrue", "__ sve_ptrue(p3, __ S, 0b00011);", "ptrue\tp3.s, vl3"],
1801 ["ptrue", "__ sve_ptrue(p3, __ S, 0b00111);", "ptrue\tp3.s, vl7"],
1802 ["ptrue", "__ sve_ptrue(p3, __ S, 0b01011);", "ptrue\tp3.s, vl64"],
1803 ["ptrue", "__ sve_ptrue(p4, __ D);", "ptrue\tp4.d"],
1804 ["ptrue", "__ sve_ptrue(p4, __ D, 0b00100);", "ptrue\tp4.d, vl4"],
1805 ["ptrue", "__ sve_ptrue(p4, __ D, 0b01000);", "ptrue\tp4.d, vl8"],
1806 ["ptrue", "__ sve_ptrue(p4, __ D, 0b01100);", "ptrue\tp4.d, vl128"],
1807 ["pfalse", "__ sve_pfalse(p7);", "pfalse\tp7.b"],
1808 ["uzp1", "__ sve_uzp1(p0, __ B, p0, p1);", "uzp1\tp0.b, p0.b, p1.b"],
1809 ["uzp1", "__ sve_uzp1(p0, __ H, p0, p1);", "uzp1\tp0.h, p0.h, p1.h"],
1810 ["uzp1", "__ sve_uzp1(p0, __ S, p0, p1);", "uzp1\tp0.s, p0.s, p1.s"],
1811 ["uzp1", "__ sve_uzp1(p0, __ D, p0, p1);", "uzp1\tp0.d, p0.d, p1.d"],
1812 ["uzp2", "__ sve_uzp2(p0, __ B, p0, p1);", "uzp2\tp0.b, p0.b, p1.b"],
1813 ["uzp2", "__ sve_uzp2(p0, __ H, p0, p1);", "uzp2\tp0.h, p0.h, p1.h"],
1814 ["uzp2", "__ sve_uzp2(p0, __ S, p0, p1);", "uzp2\tp0.s, p0.s, p1.s"],
1815 ["uzp2", "__ sve_uzp2(p0, __ D, p0, p1);", "uzp2\tp0.d, p0.d, p1.d"],
1816 ["punpklo", "__ sve_punpklo(p1, p0);", "punpklo\tp1.h, p0.b"],
1817 ["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
1818 ["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"],
1819 ["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"],
1820 ["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
1821 # SVE2 instructions
1822 ["histcnt", "__ sve_histcnt(z16, __ S, p0, z16, z16);", "histcnt\tz16.s, p0/z, z16.s, z16.s"],
1823 ["histcnt", "__ sve_histcnt(z17, __ D, p0, z17, z17);", "histcnt\tz17.d, p0/z, z17.d, z17.d"],
1824 ])
1825
1826 print "\n// FloatImmediateOp"
1827 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
1828 "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
1829 "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
1830 "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1831 astr = "fmov d0, #" + float
1832 cstr = "__ fmovd(v0, " + float + ");"
1833 print " %-50s //\t%s" % (cstr, astr)
1834 outfile.write("\t" + astr + "\n")
1835
1836 # ARMv8.1A
1837 for size in ("x", "w"):
1838 for suffix in ("", "a", "al", "l"):
1839 generate(LSEOp, [["swp", "swp", size, suffix],
1840 ["ldadd", "ldadd", size, suffix],
1841 ["ldbic", "ldclr", size, suffix],
1842 ["ldeor", "ldeor", size, suffix],
1843 ["ldorr", "ldset", size, suffix],
1847 ["ldumax", "ldumax", size, suffix]]);
1848
1849 # ARMv8.2A
1850 generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
1851
1852 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1853
1854 for i in range(6):
1855 generate(SVEBinaryImmOp, ["add", "sub", "and", "eor", "orr"])
1856
1857 generate(SVEVectorOp, [["add", "ZZZ"],
1858 ["sub", "ZZZ"],
1859 ["fadd", "ZZZ"],
1860 ["fmul", "ZZZ"],
1861 ["fsub", "ZZZ"],
1862 ["abs", "ZPZ", "m"],
1863 ["add", "ZPZ", "m", "dn"],
1864 ["and", "ZPZ", "m", "dn"],
1865 ["asr", "ZPZ", "m", "dn"],
1866 ["bic", "ZPZ", "m", "dn"],
1867 ["clz", "ZPZ", "m"],
1868 ["cnt", "ZPZ", "m"],
1869 ["eor", "ZPZ", "m", "dn"],
1870 ["lsl", "ZPZ", "m", "dn"],
1871 ["lsr", "ZPZ", "m", "dn"],
1872 ["mul", "ZPZ", "m", "dn"],
1873 ["neg", "ZPZ", "m"],
1874 ["not", "ZPZ", "m"],
1875 ["orr", "ZPZ", "m", "dn"],
1876 ["rbit", "ZPZ", "m"],
1877 ["revb", "ZPZ", "m"],
1878 ["smax", "ZPZ", "m", "dn"],
1879 ["smin", "ZPZ", "m", "dn"],
1880 ["sub", "ZPZ", "m", "dn"],
1881 ["fabs", "ZPZ", "m"],
1882 ["fadd", "ZPZ", "m", "dn"],
1883 ["fdiv", "ZPZ", "m", "dn"],
1884 ["fmax", "ZPZ", "m", "dn"],
1885 ["fmin", "ZPZ", "m", "dn"],
1886 ["fmul", "ZPZ", "m", "dn"],
1887 ["fneg", "ZPZ", "m"],
1888 ["frintm", "ZPZ", "m"],
1889 ["frintn", "ZPZ", "m"],
1890 ["frintp", "ZPZ", "m"],
1891 ["fsqrt", "ZPZ", "m"],
1892 ["fsub", "ZPZ", "m", "dn"],
1893 ["fmad", "ZPZZ", "m"],
1894 ["fmla", "ZPZZ", "m"],
1895 ["fmls", "ZPZZ", "m"],
1896 ["fmsb", "ZPZZ", "m"],
1897 ["fnmad", "ZPZZ", "m"],
1899 ["fnmla", "ZPZZ", "m"],
1900 ["fnmls", "ZPZZ", "m"],
1901 ["mla", "ZPZZ", "m"],
1902 ["mls", "ZPZZ", "m"],
1903 ["and", "ZZZ"],
1904 ["eor", "ZZZ"],
1905 ["orr", "ZZZ"],
1906 ["bic", "ZZZ"],
1907 ["uzp1", "ZZZ"],
1908 ["uzp2", "ZZZ"],
1909 ])
1910
1911 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
1912 ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
1913
1914 print "\n __ bind(forth);"
1915 outfile.write("forth:\n")
1916
1917 outfile.close()
1918
1919 # compile for sve with armv9-a+sha3 because of SHA3 crypto extension and SVE2 instructions.
1920 # armv9-a enables sve and sve2 by default.
1921 subprocess.check_call([AARCH64_AS, "-march=armv9-a+sha3", "aarch64ops.s", "-o", "aarch64ops.o"])
1922
1923 print
1924 print "/*"
1925 print "*/"
1926
1927 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
1928
1929 infile = open("aarch64ops.bin", "r")
1930 bytes = bytearray(infile.read())
1931
1932 print
1933 print " static const unsigned int insns[] ="
1934 print " {"
1935
1936 i = 0
1937 while i < len(bytes):
1938 print " 0x%02x%02x%02x%02x," % (bytes[i+3], bytes[i+2], bytes[i+1], bytes[i]),
1939 i += 4
1940 if i%16 == 0:
1941 print
|