1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128) { 1835 return false; 1836 } 1837 if ((size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1838 return false; 1839 } 1840 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1841 return false; 1842 } 1843 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1844 return false; 1845 } 1846 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1847 return false; 1848 } 1849 break; 1850 case Op_MaskAll: 1851 if (!VM_Version::supports_evex()) { 1852 return false; 1853 } 1854 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1855 return false; 1856 } 1857 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1858 return false; 1859 } 1860 break; 1861 case Op_VectorMaskCmp: 1862 if (vlen < 2 || size_in_bits < 32) { 1863 return false; 1864 } 1865 break; 1866 case Op_CompressM: 1867 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1868 return false; 1869 } 1870 break; 1871 case Op_CompressV: 1872 case Op_ExpandV: 1873 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1874 return false; 1875 } 1876 if (size_in_bits < 128 ) { 1877 return false; 1878 } 1879 case Op_VectorLongToMask: 1880 if (UseAVX < 1) { 1881 return false; 1882 } 1883 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1884 return false; 1885 } 1886 break; 1887 case Op_SignumVD: 1888 case Op_SignumVF: 1889 if (UseAVX < 1) { 1890 return false; 1891 } 1892 break; 1893 case Op_PopCountVI: 1894 case Op_PopCountVL: { 1895 if (!is_pop_count_instr_target(bt) && 1896 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1897 return false; 1898 } 1899 } 1900 break; 1901 case Op_ReverseV: 1902 case Op_ReverseBytesV: 1903 if (UseAVX < 2) { 1904 return false; 1905 } 1906 break; 1907 case Op_CountTrailingZerosV: 1908 case Op_CountLeadingZerosV: 1909 if (UseAVX < 2) { 1910 return false; 1911 } 1912 break; 1913 } 1914 return true; // Per default match rules are supported. 1915 } 1916 1917 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1918 // ADLC based match_rule_supported routine checks for the existence of pattern based 1919 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1920 // of their non-masked counterpart with mask edge being the differentiator. 1921 // This routine does a strict check on the existence of masked operation patterns 1922 // by returning a default false value for all the other opcodes apart from the 1923 // ones whose masked instruction patterns are defined in this file. 1924 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1925 return false; 1926 } 1927 1928 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1929 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1930 return false; 1931 } 1932 switch(opcode) { 1933 // Unary masked operations 1934 case Op_AbsVB: 1935 case Op_AbsVS: 1936 if(!VM_Version::supports_avx512bw()) { 1937 return false; // Implementation limitation 1938 } 1939 case Op_AbsVI: 1940 case Op_AbsVL: 1941 return true; 1942 1943 // Ternary masked operations 1944 case Op_FmaVF: 1945 case Op_FmaVD: 1946 return true; 1947 1948 case Op_MacroLogicV: 1949 if(bt != T_INT && bt != T_LONG) { 1950 return false; 1951 } 1952 return true; 1953 1954 // Binary masked operations 1955 case Op_AddVB: 1956 case Op_AddVS: 1957 case Op_SubVB: 1958 case Op_SubVS: 1959 case Op_MulVS: 1960 case Op_LShiftVS: 1961 case Op_RShiftVS: 1962 case Op_URShiftVS: 1963 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1964 if (!VM_Version::supports_avx512bw()) { 1965 return false; // Implementation limitation 1966 } 1967 return true; 1968 1969 case Op_MulVL: 1970 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1971 if (!VM_Version::supports_avx512dq()) { 1972 return false; // Implementation limitation 1973 } 1974 return true; 1975 1976 case Op_AndV: 1977 case Op_OrV: 1978 case Op_XorV: 1979 case Op_RotateRightV: 1980 case Op_RotateLeftV: 1981 if (bt != T_INT && bt != T_LONG) { 1982 return false; // Implementation limitation 1983 } 1984 return true; 1985 1986 case Op_VectorLoadMask: 1987 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1988 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1989 return false; 1990 } 1991 return true; 1992 1993 case Op_AddVI: 1994 case Op_AddVL: 1995 case Op_AddVF: 1996 case Op_AddVD: 1997 case Op_SubVI: 1998 case Op_SubVL: 1999 case Op_SubVF: 2000 case Op_SubVD: 2001 case Op_MulVI: 2002 case Op_MulVF: 2003 case Op_MulVD: 2004 case Op_DivVF: 2005 case Op_DivVD: 2006 case Op_SqrtVF: 2007 case Op_SqrtVD: 2008 case Op_LShiftVI: 2009 case Op_LShiftVL: 2010 case Op_RShiftVI: 2011 case Op_RShiftVL: 2012 case Op_URShiftVI: 2013 case Op_URShiftVL: 2014 case Op_LoadVectorMasked: 2015 case Op_StoreVectorMasked: 2016 case Op_LoadVectorGatherMasked: 2017 case Op_StoreVectorScatterMasked: 2018 return true; 2019 2020 case Op_UMinV: 2021 case Op_UMaxV: 2022 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2023 return false; 2024 } // fallthrough 2025 case Op_MaxV: 2026 case Op_MinV: 2027 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2028 return false; // Implementation limitation 2029 } 2030 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { 2031 return false; // Implementation limitation 2032 } 2033 return true; 2034 case Op_SaturatingAddV: 2035 case Op_SaturatingSubV: 2036 if (!is_subword_type(bt)) { 2037 return false; 2038 } 2039 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2040 return false; // Implementation limitation 2041 } 2042 return true; 2043 2044 case Op_VectorMaskCmp: 2045 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2046 return false; // Implementation limitation 2047 } 2048 return true; 2049 2050 case Op_VectorRearrange: 2051 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2052 return false; // Implementation limitation 2053 } 2054 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2055 return false; // Implementation limitation 2056 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2057 return false; // Implementation limitation 2058 } 2059 return true; 2060 2061 // Binary Logical operations 2062 case Op_AndVMask: 2063 case Op_OrVMask: 2064 case Op_XorVMask: 2065 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2066 return false; // Implementation limitation 2067 } 2068 return true; 2069 2070 case Op_PopCountVI: 2071 case Op_PopCountVL: 2072 if (!is_pop_count_instr_target(bt)) { 2073 return false; 2074 } 2075 return true; 2076 2077 case Op_MaskAll: 2078 return true; 2079 2080 case Op_CountLeadingZerosV: 2081 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2082 return true; 2083 } 2084 default: 2085 return false; 2086 } 2087 } 2088 2089 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2090 return false; 2091 } 2092 2093 // Return true if Vector::rearrange needs preparation of the shuffle argument 2094 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2095 switch (elem_bt) { 2096 case T_BYTE: return false; 2097 case T_SHORT: return !VM_Version::supports_avx512bw(); 2098 case T_INT: return !VM_Version::supports_avx(); 2099 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2100 default: 2101 ShouldNotReachHere(); 2102 return false; 2103 } 2104 } 2105 2106 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2107 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2108 bool legacy = (generic_opnd->opcode() == LEGVEC); 2109 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2110 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2111 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2112 return new legVecZOper(); 2113 } 2114 if (legacy) { 2115 switch (ideal_reg) { 2116 case Op_VecS: return new legVecSOper(); 2117 case Op_VecD: return new legVecDOper(); 2118 case Op_VecX: return new legVecXOper(); 2119 case Op_VecY: return new legVecYOper(); 2120 case Op_VecZ: return new legVecZOper(); 2121 } 2122 } else { 2123 switch (ideal_reg) { 2124 case Op_VecS: return new vecSOper(); 2125 case Op_VecD: return new vecDOper(); 2126 case Op_VecX: return new vecXOper(); 2127 case Op_VecY: return new vecYOper(); 2128 case Op_VecZ: return new vecZOper(); 2129 } 2130 } 2131 ShouldNotReachHere(); 2132 return nullptr; 2133 } 2134 2135 bool Matcher::is_reg2reg_move(MachNode* m) { 2136 switch (m->rule()) { 2137 case MoveVec2Leg_rule: 2138 case MoveLeg2Vec_rule: 2139 case MoveF2VL_rule: 2140 case MoveF2LEG_rule: 2141 case MoveVL2F_rule: 2142 case MoveLEG2F_rule: 2143 case MoveD2VL_rule: 2144 case MoveD2LEG_rule: 2145 case MoveVL2D_rule: 2146 case MoveLEG2D_rule: 2147 return true; 2148 default: 2149 return false; 2150 } 2151 } 2152 2153 bool Matcher::is_generic_vector(MachOper* opnd) { 2154 switch (opnd->opcode()) { 2155 case VEC: 2156 case LEGVEC: 2157 return true; 2158 default: 2159 return false; 2160 } 2161 } 2162 2163 //------------------------------------------------------------------------ 2164 2165 const RegMask* Matcher::predicate_reg_mask(void) { 2166 return &_VECTMASK_REG_mask; 2167 } 2168 2169 // Max vector size in bytes. 0 if not supported. 2170 int Matcher::vector_width_in_bytes(BasicType bt) { 2171 assert(is_java_primitive(bt), "only primitive type vectors"); 2172 // SSE2 supports 128bit vectors for all types. 2173 // AVX2 supports 256bit vectors for all types. 2174 // AVX2/EVEX supports 512bit vectors for all types. 2175 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2176 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2177 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2178 size = (UseAVX > 2) ? 64 : 32; 2179 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2180 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2181 // Use flag to limit vector size. 2182 size = MIN2(size,(int)MaxVectorSize); 2183 // Minimum 2 values in vector (or 4 for bytes). 2184 switch (bt) { 2185 case T_DOUBLE: 2186 case T_LONG: 2187 if (size < 16) return 0; 2188 break; 2189 case T_FLOAT: 2190 case T_INT: 2191 if (size < 8) return 0; 2192 break; 2193 case T_BOOLEAN: 2194 if (size < 4) return 0; 2195 break; 2196 case T_CHAR: 2197 if (size < 4) return 0; 2198 break; 2199 case T_BYTE: 2200 if (size < 4) return 0; 2201 break; 2202 case T_SHORT: 2203 if (size < 4) return 0; 2204 break; 2205 default: 2206 ShouldNotReachHere(); 2207 } 2208 return size; 2209 } 2210 2211 // Limits on vector size (number of elements) loaded into vector. 2212 int Matcher::max_vector_size(const BasicType bt) { 2213 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2214 } 2215 int Matcher::min_vector_size(const BasicType bt) { 2216 int max_size = max_vector_size(bt); 2217 // Min size which can be loaded into vector is 4 bytes. 2218 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2219 // Support for calling svml double64 vectors 2220 if (bt == T_DOUBLE) { 2221 size = 1; 2222 } 2223 return MIN2(size,max_size); 2224 } 2225 2226 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2227 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2228 // by default on Cascade Lake 2229 if (VM_Version::is_default_intel_cascade_lake()) { 2230 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2231 } 2232 return Matcher::max_vector_size(bt); 2233 } 2234 2235 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2236 return -1; 2237 } 2238 2239 // Vector ideal reg corresponding to specified size in bytes 2240 uint Matcher::vector_ideal_reg(int size) { 2241 assert(MaxVectorSize >= size, ""); 2242 switch(size) { 2243 case 4: return Op_VecS; 2244 case 8: return Op_VecD; 2245 case 16: return Op_VecX; 2246 case 32: return Op_VecY; 2247 case 64: return Op_VecZ; 2248 } 2249 ShouldNotReachHere(); 2250 return 0; 2251 } 2252 2253 // Check for shift by small constant as well 2254 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2255 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2256 shift->in(2)->get_int() <= 3 && 2257 // Are there other uses besides address expressions? 2258 !matcher->is_visited(shift)) { 2259 address_visited.set(shift->_idx); // Flag as address_visited 2260 mstack.push(shift->in(2), Matcher::Visit); 2261 Node *conv = shift->in(1); 2262 // Allow Matcher to match the rule which bypass 2263 // ConvI2L operation for an array index on LP64 2264 // if the index value is positive. 2265 if (conv->Opcode() == Op_ConvI2L && 2266 conv->as_Type()->type()->is_long()->_lo >= 0 && 2267 // Are there other uses besides address expressions? 2268 !matcher->is_visited(conv)) { 2269 address_visited.set(conv->_idx); // Flag as address_visited 2270 mstack.push(conv->in(1), Matcher::Pre_Visit); 2271 } else { 2272 mstack.push(conv, Matcher::Pre_Visit); 2273 } 2274 return true; 2275 } 2276 return false; 2277 } 2278 2279 // This function identifies sub-graphs in which a 'load' node is 2280 // input to two different nodes, and such that it can be matched 2281 // with BMI instructions like blsi, blsr, etc. 2282 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2283 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2284 // refers to the same node. 2285 // 2286 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2287 // This is a temporary solution until we make DAGs expressible in ADL. 2288 template<typename ConType> 2289 class FusedPatternMatcher { 2290 Node* _op1_node; 2291 Node* _mop_node; 2292 int _con_op; 2293 2294 static int match_next(Node* n, int next_op, int next_op_idx) { 2295 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2296 return -1; 2297 } 2298 2299 if (next_op_idx == -1) { // n is commutative, try rotations 2300 if (n->in(1)->Opcode() == next_op) { 2301 return 1; 2302 } else if (n->in(2)->Opcode() == next_op) { 2303 return 2; 2304 } 2305 } else { 2306 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2307 if (n->in(next_op_idx)->Opcode() == next_op) { 2308 return next_op_idx; 2309 } 2310 } 2311 return -1; 2312 } 2313 2314 public: 2315 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2316 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2317 2318 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2319 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2320 typename ConType::NativeType con_value) { 2321 if (_op1_node->Opcode() != op1) { 2322 return false; 2323 } 2324 if (_mop_node->outcnt() > 2) { 2325 return false; 2326 } 2327 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2328 if (op1_op2_idx == -1) { 2329 return false; 2330 } 2331 // Memory operation must be the other edge 2332 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2333 2334 // Check that the mop node is really what we want 2335 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2336 Node* op2_node = _op1_node->in(op1_op2_idx); 2337 if (op2_node->outcnt() > 1) { 2338 return false; 2339 } 2340 assert(op2_node->Opcode() == op2, "Should be"); 2341 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2342 if (op2_con_idx == -1) { 2343 return false; 2344 } 2345 // Memory operation must be the other edge 2346 int op2_mop_idx = (op2_con_idx & 1) + 1; 2347 // Check that the memory operation is the same node 2348 if (op2_node->in(op2_mop_idx) == _mop_node) { 2349 // Now check the constant 2350 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2351 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2352 return true; 2353 } 2354 } 2355 } 2356 return false; 2357 } 2358 }; 2359 2360 static bool is_bmi_pattern(Node* n, Node* m) { 2361 assert(UseBMI1Instructions, "sanity"); 2362 if (n != nullptr && m != nullptr) { 2363 if (m->Opcode() == Op_LoadI) { 2364 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2365 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2366 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2367 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2368 } else if (m->Opcode() == Op_LoadL) { 2369 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2370 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2371 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2372 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2373 } 2374 } 2375 return false; 2376 } 2377 2378 // Should the matcher clone input 'm' of node 'n'? 2379 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2380 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2381 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2382 mstack.push(m, Visit); 2383 return true; 2384 } 2385 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2386 mstack.push(m, Visit); // m = ShiftCntV 2387 return true; 2388 } 2389 if (is_encode_and_store_pattern(n, m)) { 2390 mstack.push(m, Visit); 2391 return true; 2392 } 2393 return false; 2394 } 2395 2396 // Should the Matcher clone shifts on addressing modes, expecting them 2397 // to be subsumed into complex addressing expressions or compute them 2398 // into registers? 2399 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2400 Node *off = m->in(AddPNode::Offset); 2401 if (off->is_Con()) { 2402 address_visited.test_set(m->_idx); // Flag as address_visited 2403 Node *adr = m->in(AddPNode::Address); 2404 2405 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2406 // AtomicAdd is not an addressing expression. 2407 // Cheap to find it by looking for screwy base. 2408 if (adr->is_AddP() && 2409 !adr->in(AddPNode::Base)->is_top() && 2410 !adr->in(AddPNode::Offset)->is_Con() && 2411 off->get_long() == (int) (off->get_long()) && // immL32 2412 // Are there other uses besides address expressions? 2413 !is_visited(adr)) { 2414 address_visited.set(adr->_idx); // Flag as address_visited 2415 Node *shift = adr->in(AddPNode::Offset); 2416 if (!clone_shift(shift, this, mstack, address_visited)) { 2417 mstack.push(shift, Pre_Visit); 2418 } 2419 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2420 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2421 } else { 2422 mstack.push(adr, Pre_Visit); 2423 } 2424 2425 // Clone X+offset as it also folds into most addressing expressions 2426 mstack.push(off, Visit); 2427 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2428 return true; 2429 } else if (clone_shift(off, this, mstack, address_visited)) { 2430 address_visited.test_set(m->_idx); // Flag as address_visited 2431 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2432 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2433 return true; 2434 } 2435 return false; 2436 } 2437 2438 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2439 switch (bt) { 2440 case BoolTest::eq: 2441 return Assembler::eq; 2442 case BoolTest::ne: 2443 return Assembler::neq; 2444 case BoolTest::le: 2445 case BoolTest::ule: 2446 return Assembler::le; 2447 case BoolTest::ge: 2448 case BoolTest::uge: 2449 return Assembler::nlt; 2450 case BoolTest::lt: 2451 case BoolTest::ult: 2452 return Assembler::lt; 2453 case BoolTest::gt: 2454 case BoolTest::ugt: 2455 return Assembler::nle; 2456 default : ShouldNotReachHere(); return Assembler::_false; 2457 } 2458 } 2459 2460 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2461 switch (bt) { 2462 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2463 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2464 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2465 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2466 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2467 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2468 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2469 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2470 } 2471 } 2472 2473 // Helper methods for MachSpillCopyNode::implementation(). 2474 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2475 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2476 assert(ireg == Op_VecS || // 32bit vector 2477 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2478 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2479 "no non-adjacent vector moves" ); 2480 if (masm) { 2481 switch (ireg) { 2482 case Op_VecS: // copy whole register 2483 case Op_VecD: 2484 case Op_VecX: 2485 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2486 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2487 } else { 2488 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2489 } 2490 break; 2491 case Op_VecY: 2492 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2493 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2494 } else { 2495 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2496 } 2497 break; 2498 case Op_VecZ: 2499 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2500 break; 2501 default: 2502 ShouldNotReachHere(); 2503 } 2504 #ifndef PRODUCT 2505 } else { 2506 switch (ireg) { 2507 case Op_VecS: 2508 case Op_VecD: 2509 case Op_VecX: 2510 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2511 break; 2512 case Op_VecY: 2513 case Op_VecZ: 2514 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2515 break; 2516 default: 2517 ShouldNotReachHere(); 2518 } 2519 #endif 2520 } 2521 } 2522 2523 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2524 int stack_offset, int reg, uint ireg, outputStream* st) { 2525 if (masm) { 2526 if (is_load) { 2527 switch (ireg) { 2528 case Op_VecS: 2529 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecD: 2532 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2533 break; 2534 case Op_VecX: 2535 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2536 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2537 } else { 2538 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2539 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2540 } 2541 break; 2542 case Op_VecY: 2543 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2544 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2545 } else { 2546 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2547 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2548 } 2549 break; 2550 case Op_VecZ: 2551 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2552 break; 2553 default: 2554 ShouldNotReachHere(); 2555 } 2556 } else { // store 2557 switch (ireg) { 2558 case Op_VecS: 2559 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecD: 2562 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2563 break; 2564 case Op_VecX: 2565 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2566 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2567 } 2568 else { 2569 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2570 } 2571 break; 2572 case Op_VecY: 2573 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2574 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2575 } 2576 else { 2577 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2578 } 2579 break; 2580 case Op_VecZ: 2581 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2582 break; 2583 default: 2584 ShouldNotReachHere(); 2585 } 2586 } 2587 #ifndef PRODUCT 2588 } else { 2589 if (is_load) { 2590 switch (ireg) { 2591 case Op_VecS: 2592 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecD: 2595 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecX: 2598 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2599 break; 2600 case Op_VecY: 2601 case Op_VecZ: 2602 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2603 break; 2604 default: 2605 ShouldNotReachHere(); 2606 } 2607 } else { // store 2608 switch (ireg) { 2609 case Op_VecS: 2610 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecD: 2613 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecX: 2616 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2617 break; 2618 case Op_VecY: 2619 case Op_VecZ: 2620 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2621 break; 2622 default: 2623 ShouldNotReachHere(); 2624 } 2625 } 2626 #endif 2627 } 2628 } 2629 2630 template <class T> 2631 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2632 int size = type2aelembytes(bt) * len; 2633 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2634 for (int i = 0; i < len; i++) { 2635 int offset = i * type2aelembytes(bt); 2636 switch (bt) { 2637 case T_BYTE: val->at(i) = con; break; 2638 case T_SHORT: { 2639 jshort c = con; 2640 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2641 break; 2642 } 2643 case T_INT: { 2644 jint c = con; 2645 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2646 break; 2647 } 2648 case T_LONG: { 2649 jlong c = con; 2650 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2651 break; 2652 } 2653 case T_FLOAT: { 2654 jfloat c = con; 2655 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2656 break; 2657 } 2658 case T_DOUBLE: { 2659 jdouble c = con; 2660 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2661 break; 2662 } 2663 default: assert(false, "%s", type2name(bt)); 2664 } 2665 } 2666 return val; 2667 } 2668 2669 static inline jlong high_bit_set(BasicType bt) { 2670 switch (bt) { 2671 case T_BYTE: return 0x8080808080808080; 2672 case T_SHORT: return 0x8000800080008000; 2673 case T_INT: return 0x8000000080000000; 2674 case T_LONG: return 0x8000000000000000; 2675 default: 2676 ShouldNotReachHere(); 2677 return 0; 2678 } 2679 } 2680 2681 #ifndef PRODUCT 2682 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2683 st->print("nop \t# %d bytes pad for loops and calls", _count); 2684 } 2685 #endif 2686 2687 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2688 __ nop(_count); 2689 } 2690 2691 uint MachNopNode::size(PhaseRegAlloc*) const { 2692 return _count; 2693 } 2694 2695 #ifndef PRODUCT 2696 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2697 st->print("# breakpoint"); 2698 } 2699 #endif 2700 2701 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2702 __ int3(); 2703 } 2704 2705 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2706 return MachNode::size(ra_); 2707 } 2708 2709 %} 2710 2711 encode %{ 2712 2713 enc_class call_epilog %{ 2714 if (VerifyStackAtCalls) { 2715 // Check that stack depth is unchanged: find majik cookie on stack 2716 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2717 Label L; 2718 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2719 __ jccb(Assembler::equal, L); 2720 // Die if stack mismatch 2721 __ int3(); 2722 __ bind(L); 2723 } 2724 %} 2725 2726 %} 2727 2728 // Operands for bound floating pointer register arguments 2729 operand rxmm0() %{ 2730 constraint(ALLOC_IN_RC(xmm0_reg)); 2731 match(VecX); 2732 format%{%} 2733 interface(REG_INTER); 2734 %} 2735 2736 //----------OPERANDS----------------------------------------------------------- 2737 // Operand definitions must precede instruction definitions for correct parsing 2738 // in the ADLC because operands constitute user defined types which are used in 2739 // instruction definitions. 2740 2741 // Vectors 2742 2743 // Dummy generic vector class. Should be used for all vector operands. 2744 // Replaced with vec[SDXYZ] during post-selection pass. 2745 operand vec() %{ 2746 constraint(ALLOC_IN_RC(dynamic)); 2747 match(VecX); 2748 match(VecY); 2749 match(VecZ); 2750 match(VecS); 2751 match(VecD); 2752 2753 format %{ %} 2754 interface(REG_INTER); 2755 %} 2756 2757 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2758 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2759 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2760 // runtime code generation via reg_class_dynamic. 2761 operand legVec() %{ 2762 constraint(ALLOC_IN_RC(dynamic)); 2763 match(VecX); 2764 match(VecY); 2765 match(VecZ); 2766 match(VecS); 2767 match(VecD); 2768 2769 format %{ %} 2770 interface(REG_INTER); 2771 %} 2772 2773 // Replaces vec during post-selection cleanup. See above. 2774 operand vecS() %{ 2775 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2776 match(VecS); 2777 2778 format %{ %} 2779 interface(REG_INTER); 2780 %} 2781 2782 // Replaces legVec during post-selection cleanup. See above. 2783 operand legVecS() %{ 2784 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2785 match(VecS); 2786 2787 format %{ %} 2788 interface(REG_INTER); 2789 %} 2790 2791 // Replaces vec during post-selection cleanup. See above. 2792 operand vecD() %{ 2793 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2794 match(VecD); 2795 2796 format %{ %} 2797 interface(REG_INTER); 2798 %} 2799 2800 // Replaces legVec during post-selection cleanup. See above. 2801 operand legVecD() %{ 2802 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2803 match(VecD); 2804 2805 format %{ %} 2806 interface(REG_INTER); 2807 %} 2808 2809 // Replaces vec during post-selection cleanup. See above. 2810 operand vecX() %{ 2811 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2812 match(VecX); 2813 2814 format %{ %} 2815 interface(REG_INTER); 2816 %} 2817 2818 // Replaces legVec during post-selection cleanup. See above. 2819 operand legVecX() %{ 2820 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2821 match(VecX); 2822 2823 format %{ %} 2824 interface(REG_INTER); 2825 %} 2826 2827 // Replaces vec during post-selection cleanup. See above. 2828 operand vecY() %{ 2829 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2830 match(VecY); 2831 2832 format %{ %} 2833 interface(REG_INTER); 2834 %} 2835 2836 // Replaces legVec during post-selection cleanup. See above. 2837 operand legVecY() %{ 2838 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2839 match(VecY); 2840 2841 format %{ %} 2842 interface(REG_INTER); 2843 %} 2844 2845 // Replaces vec during post-selection cleanup. See above. 2846 operand vecZ() %{ 2847 constraint(ALLOC_IN_RC(vectorz_reg)); 2848 match(VecZ); 2849 2850 format %{ %} 2851 interface(REG_INTER); 2852 %} 2853 2854 // Replaces legVec during post-selection cleanup. See above. 2855 operand legVecZ() %{ 2856 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2857 match(VecZ); 2858 2859 format %{ %} 2860 interface(REG_INTER); 2861 %} 2862 2863 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2864 2865 // ============================================================================ 2866 2867 instruct ShouldNotReachHere() %{ 2868 match(Halt); 2869 format %{ "stop\t# ShouldNotReachHere" %} 2870 ins_encode %{ 2871 if (is_reachable()) { 2872 const char* str = __ code_string(_halt_reason); 2873 __ stop(str); 2874 } 2875 %} 2876 ins_pipe(pipe_slow); 2877 %} 2878 2879 // ============================================================================ 2880 2881 instruct addF_reg(regF dst, regF src) %{ 2882 predicate(UseAVX == 0); 2883 match(Set dst (AddF dst src)); 2884 2885 format %{ "addss $dst, $src" %} 2886 ins_cost(150); 2887 ins_encode %{ 2888 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2889 %} 2890 ins_pipe(pipe_slow); 2891 %} 2892 2893 instruct addF_mem(regF dst, memory src) %{ 2894 predicate(UseAVX == 0); 2895 match(Set dst (AddF dst (LoadF src))); 2896 2897 format %{ "addss $dst, $src" %} 2898 ins_cost(150); 2899 ins_encode %{ 2900 __ addss($dst$$XMMRegister, $src$$Address); 2901 %} 2902 ins_pipe(pipe_slow); 2903 %} 2904 2905 instruct addF_imm(regF dst, immF con) %{ 2906 predicate(UseAVX == 0); 2907 match(Set dst (AddF dst con)); 2908 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2909 ins_cost(150); 2910 ins_encode %{ 2911 __ addss($dst$$XMMRegister, $constantaddress($con)); 2912 %} 2913 ins_pipe(pipe_slow); 2914 %} 2915 2916 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2917 predicate(UseAVX > 0); 2918 match(Set dst (AddF src1 src2)); 2919 2920 format %{ "vaddss $dst, $src1, $src2" %} 2921 ins_cost(150); 2922 ins_encode %{ 2923 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2924 %} 2925 ins_pipe(pipe_slow); 2926 %} 2927 2928 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2929 predicate(UseAVX > 0); 2930 match(Set dst (AddF src1 (LoadF src2))); 2931 2932 format %{ "vaddss $dst, $src1, $src2" %} 2933 ins_cost(150); 2934 ins_encode %{ 2935 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2936 %} 2937 ins_pipe(pipe_slow); 2938 %} 2939 2940 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2941 predicate(UseAVX > 0); 2942 match(Set dst (AddF src con)); 2943 2944 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2945 ins_cost(150); 2946 ins_encode %{ 2947 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2948 %} 2949 ins_pipe(pipe_slow); 2950 %} 2951 2952 instruct addD_reg(regD dst, regD src) %{ 2953 predicate(UseAVX == 0); 2954 match(Set dst (AddD dst src)); 2955 2956 format %{ "addsd $dst, $src" %} 2957 ins_cost(150); 2958 ins_encode %{ 2959 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2960 %} 2961 ins_pipe(pipe_slow); 2962 %} 2963 2964 instruct addD_mem(regD dst, memory src) %{ 2965 predicate(UseAVX == 0); 2966 match(Set dst (AddD dst (LoadD src))); 2967 2968 format %{ "addsd $dst, $src" %} 2969 ins_cost(150); 2970 ins_encode %{ 2971 __ addsd($dst$$XMMRegister, $src$$Address); 2972 %} 2973 ins_pipe(pipe_slow); 2974 %} 2975 2976 instruct addD_imm(regD dst, immD con) %{ 2977 predicate(UseAVX == 0); 2978 match(Set dst (AddD dst con)); 2979 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2980 ins_cost(150); 2981 ins_encode %{ 2982 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2983 %} 2984 ins_pipe(pipe_slow); 2985 %} 2986 2987 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2988 predicate(UseAVX > 0); 2989 match(Set dst (AddD src1 src2)); 2990 2991 format %{ "vaddsd $dst, $src1, $src2" %} 2992 ins_cost(150); 2993 ins_encode %{ 2994 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2995 %} 2996 ins_pipe(pipe_slow); 2997 %} 2998 2999 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3000 predicate(UseAVX > 0); 3001 match(Set dst (AddD src1 (LoadD src2))); 3002 3003 format %{ "vaddsd $dst, $src1, $src2" %} 3004 ins_cost(150); 3005 ins_encode %{ 3006 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3007 %} 3008 ins_pipe(pipe_slow); 3009 %} 3010 3011 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3012 predicate(UseAVX > 0); 3013 match(Set dst (AddD src con)); 3014 3015 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3016 ins_cost(150); 3017 ins_encode %{ 3018 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3019 %} 3020 ins_pipe(pipe_slow); 3021 %} 3022 3023 instruct subF_reg(regF dst, regF src) %{ 3024 predicate(UseAVX == 0); 3025 match(Set dst (SubF dst src)); 3026 3027 format %{ "subss $dst, $src" %} 3028 ins_cost(150); 3029 ins_encode %{ 3030 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3031 %} 3032 ins_pipe(pipe_slow); 3033 %} 3034 3035 instruct subF_mem(regF dst, memory src) %{ 3036 predicate(UseAVX == 0); 3037 match(Set dst (SubF dst (LoadF src))); 3038 3039 format %{ "subss $dst, $src" %} 3040 ins_cost(150); 3041 ins_encode %{ 3042 __ subss($dst$$XMMRegister, $src$$Address); 3043 %} 3044 ins_pipe(pipe_slow); 3045 %} 3046 3047 instruct subF_imm(regF dst, immF con) %{ 3048 predicate(UseAVX == 0); 3049 match(Set dst (SubF dst con)); 3050 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3051 ins_cost(150); 3052 ins_encode %{ 3053 __ subss($dst$$XMMRegister, $constantaddress($con)); 3054 %} 3055 ins_pipe(pipe_slow); 3056 %} 3057 3058 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3059 predicate(UseAVX > 0); 3060 match(Set dst (SubF src1 src2)); 3061 3062 format %{ "vsubss $dst, $src1, $src2" %} 3063 ins_cost(150); 3064 ins_encode %{ 3065 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3066 %} 3067 ins_pipe(pipe_slow); 3068 %} 3069 3070 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3071 predicate(UseAVX > 0); 3072 match(Set dst (SubF src1 (LoadF src2))); 3073 3074 format %{ "vsubss $dst, $src1, $src2" %} 3075 ins_cost(150); 3076 ins_encode %{ 3077 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3078 %} 3079 ins_pipe(pipe_slow); 3080 %} 3081 3082 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3083 predicate(UseAVX > 0); 3084 match(Set dst (SubF src con)); 3085 3086 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3087 ins_cost(150); 3088 ins_encode %{ 3089 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3090 %} 3091 ins_pipe(pipe_slow); 3092 %} 3093 3094 instruct subD_reg(regD dst, regD src) %{ 3095 predicate(UseAVX == 0); 3096 match(Set dst (SubD dst src)); 3097 3098 format %{ "subsd $dst, $src" %} 3099 ins_cost(150); 3100 ins_encode %{ 3101 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3102 %} 3103 ins_pipe(pipe_slow); 3104 %} 3105 3106 instruct subD_mem(regD dst, memory src) %{ 3107 predicate(UseAVX == 0); 3108 match(Set dst (SubD dst (LoadD src))); 3109 3110 format %{ "subsd $dst, $src" %} 3111 ins_cost(150); 3112 ins_encode %{ 3113 __ subsd($dst$$XMMRegister, $src$$Address); 3114 %} 3115 ins_pipe(pipe_slow); 3116 %} 3117 3118 instruct subD_imm(regD dst, immD con) %{ 3119 predicate(UseAVX == 0); 3120 match(Set dst (SubD dst con)); 3121 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3122 ins_cost(150); 3123 ins_encode %{ 3124 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3125 %} 3126 ins_pipe(pipe_slow); 3127 %} 3128 3129 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3130 predicate(UseAVX > 0); 3131 match(Set dst (SubD src1 src2)); 3132 3133 format %{ "vsubsd $dst, $src1, $src2" %} 3134 ins_cost(150); 3135 ins_encode %{ 3136 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3137 %} 3138 ins_pipe(pipe_slow); 3139 %} 3140 3141 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3142 predicate(UseAVX > 0); 3143 match(Set dst (SubD src1 (LoadD src2))); 3144 3145 format %{ "vsubsd $dst, $src1, $src2" %} 3146 ins_cost(150); 3147 ins_encode %{ 3148 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3149 %} 3150 ins_pipe(pipe_slow); 3151 %} 3152 3153 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3154 predicate(UseAVX > 0); 3155 match(Set dst (SubD src con)); 3156 3157 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3158 ins_cost(150); 3159 ins_encode %{ 3160 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3161 %} 3162 ins_pipe(pipe_slow); 3163 %} 3164 3165 instruct mulF_reg(regF dst, regF src) %{ 3166 predicate(UseAVX == 0); 3167 match(Set dst (MulF dst src)); 3168 3169 format %{ "mulss $dst, $src" %} 3170 ins_cost(150); 3171 ins_encode %{ 3172 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3173 %} 3174 ins_pipe(pipe_slow); 3175 %} 3176 3177 instruct mulF_mem(regF dst, memory src) %{ 3178 predicate(UseAVX == 0); 3179 match(Set dst (MulF dst (LoadF src))); 3180 3181 format %{ "mulss $dst, $src" %} 3182 ins_cost(150); 3183 ins_encode %{ 3184 __ mulss($dst$$XMMRegister, $src$$Address); 3185 %} 3186 ins_pipe(pipe_slow); 3187 %} 3188 3189 instruct mulF_imm(regF dst, immF con) %{ 3190 predicate(UseAVX == 0); 3191 match(Set dst (MulF dst con)); 3192 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3193 ins_cost(150); 3194 ins_encode %{ 3195 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3196 %} 3197 ins_pipe(pipe_slow); 3198 %} 3199 3200 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3201 predicate(UseAVX > 0); 3202 match(Set dst (MulF src1 src2)); 3203 3204 format %{ "vmulss $dst, $src1, $src2" %} 3205 ins_cost(150); 3206 ins_encode %{ 3207 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3208 %} 3209 ins_pipe(pipe_slow); 3210 %} 3211 3212 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3213 predicate(UseAVX > 0); 3214 match(Set dst (MulF src1 (LoadF src2))); 3215 3216 format %{ "vmulss $dst, $src1, $src2" %} 3217 ins_cost(150); 3218 ins_encode %{ 3219 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3220 %} 3221 ins_pipe(pipe_slow); 3222 %} 3223 3224 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3225 predicate(UseAVX > 0); 3226 match(Set dst (MulF src con)); 3227 3228 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3229 ins_cost(150); 3230 ins_encode %{ 3231 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3232 %} 3233 ins_pipe(pipe_slow); 3234 %} 3235 3236 instruct mulD_reg(regD dst, regD src) %{ 3237 predicate(UseAVX == 0); 3238 match(Set dst (MulD dst src)); 3239 3240 format %{ "mulsd $dst, $src" %} 3241 ins_cost(150); 3242 ins_encode %{ 3243 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3244 %} 3245 ins_pipe(pipe_slow); 3246 %} 3247 3248 instruct mulD_mem(regD dst, memory src) %{ 3249 predicate(UseAVX == 0); 3250 match(Set dst (MulD dst (LoadD src))); 3251 3252 format %{ "mulsd $dst, $src" %} 3253 ins_cost(150); 3254 ins_encode %{ 3255 __ mulsd($dst$$XMMRegister, $src$$Address); 3256 %} 3257 ins_pipe(pipe_slow); 3258 %} 3259 3260 instruct mulD_imm(regD dst, immD con) %{ 3261 predicate(UseAVX == 0); 3262 match(Set dst (MulD dst con)); 3263 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3264 ins_cost(150); 3265 ins_encode %{ 3266 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3267 %} 3268 ins_pipe(pipe_slow); 3269 %} 3270 3271 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3272 predicate(UseAVX > 0); 3273 match(Set dst (MulD src1 src2)); 3274 3275 format %{ "vmulsd $dst, $src1, $src2" %} 3276 ins_cost(150); 3277 ins_encode %{ 3278 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3279 %} 3280 ins_pipe(pipe_slow); 3281 %} 3282 3283 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3284 predicate(UseAVX > 0); 3285 match(Set dst (MulD src1 (LoadD src2))); 3286 3287 format %{ "vmulsd $dst, $src1, $src2" %} 3288 ins_cost(150); 3289 ins_encode %{ 3290 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3291 %} 3292 ins_pipe(pipe_slow); 3293 %} 3294 3295 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3296 predicate(UseAVX > 0); 3297 match(Set dst (MulD src con)); 3298 3299 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3300 ins_cost(150); 3301 ins_encode %{ 3302 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3303 %} 3304 ins_pipe(pipe_slow); 3305 %} 3306 3307 instruct divF_reg(regF dst, regF src) %{ 3308 predicate(UseAVX == 0); 3309 match(Set dst (DivF dst src)); 3310 3311 format %{ "divss $dst, $src" %} 3312 ins_cost(150); 3313 ins_encode %{ 3314 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3315 %} 3316 ins_pipe(pipe_slow); 3317 %} 3318 3319 instruct divF_mem(regF dst, memory src) %{ 3320 predicate(UseAVX == 0); 3321 match(Set dst (DivF dst (LoadF src))); 3322 3323 format %{ "divss $dst, $src" %} 3324 ins_cost(150); 3325 ins_encode %{ 3326 __ divss($dst$$XMMRegister, $src$$Address); 3327 %} 3328 ins_pipe(pipe_slow); 3329 %} 3330 3331 instruct divF_imm(regF dst, immF con) %{ 3332 predicate(UseAVX == 0); 3333 match(Set dst (DivF dst con)); 3334 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3335 ins_cost(150); 3336 ins_encode %{ 3337 __ divss($dst$$XMMRegister, $constantaddress($con)); 3338 %} 3339 ins_pipe(pipe_slow); 3340 %} 3341 3342 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3343 predicate(UseAVX > 0); 3344 match(Set dst (DivF src1 src2)); 3345 3346 format %{ "vdivss $dst, $src1, $src2" %} 3347 ins_cost(150); 3348 ins_encode %{ 3349 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3350 %} 3351 ins_pipe(pipe_slow); 3352 %} 3353 3354 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3355 predicate(UseAVX > 0); 3356 match(Set dst (DivF src1 (LoadF src2))); 3357 3358 format %{ "vdivss $dst, $src1, $src2" %} 3359 ins_cost(150); 3360 ins_encode %{ 3361 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3362 %} 3363 ins_pipe(pipe_slow); 3364 %} 3365 3366 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3367 predicate(UseAVX > 0); 3368 match(Set dst (DivF src con)); 3369 3370 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3371 ins_cost(150); 3372 ins_encode %{ 3373 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3374 %} 3375 ins_pipe(pipe_slow); 3376 %} 3377 3378 instruct divD_reg(regD dst, regD src) %{ 3379 predicate(UseAVX == 0); 3380 match(Set dst (DivD dst src)); 3381 3382 format %{ "divsd $dst, $src" %} 3383 ins_cost(150); 3384 ins_encode %{ 3385 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3386 %} 3387 ins_pipe(pipe_slow); 3388 %} 3389 3390 instruct divD_mem(regD dst, memory src) %{ 3391 predicate(UseAVX == 0); 3392 match(Set dst (DivD dst (LoadD src))); 3393 3394 format %{ "divsd $dst, $src" %} 3395 ins_cost(150); 3396 ins_encode %{ 3397 __ divsd($dst$$XMMRegister, $src$$Address); 3398 %} 3399 ins_pipe(pipe_slow); 3400 %} 3401 3402 instruct divD_imm(regD dst, immD con) %{ 3403 predicate(UseAVX == 0); 3404 match(Set dst (DivD dst con)); 3405 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3406 ins_cost(150); 3407 ins_encode %{ 3408 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3409 %} 3410 ins_pipe(pipe_slow); 3411 %} 3412 3413 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3414 predicate(UseAVX > 0); 3415 match(Set dst (DivD src1 src2)); 3416 3417 format %{ "vdivsd $dst, $src1, $src2" %} 3418 ins_cost(150); 3419 ins_encode %{ 3420 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3421 %} 3422 ins_pipe(pipe_slow); 3423 %} 3424 3425 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3426 predicate(UseAVX > 0); 3427 match(Set dst (DivD src1 (LoadD src2))); 3428 3429 format %{ "vdivsd $dst, $src1, $src2" %} 3430 ins_cost(150); 3431 ins_encode %{ 3432 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3433 %} 3434 ins_pipe(pipe_slow); 3435 %} 3436 3437 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3438 predicate(UseAVX > 0); 3439 match(Set dst (DivD src con)); 3440 3441 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3442 ins_cost(150); 3443 ins_encode %{ 3444 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3445 %} 3446 ins_pipe(pipe_slow); 3447 %} 3448 3449 instruct absF_reg(regF dst) %{ 3450 predicate(UseAVX == 0); 3451 match(Set dst (AbsF dst)); 3452 ins_cost(150); 3453 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3454 ins_encode %{ 3455 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3456 %} 3457 ins_pipe(pipe_slow); 3458 %} 3459 3460 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3461 predicate(UseAVX > 0); 3462 match(Set dst (AbsF src)); 3463 ins_cost(150); 3464 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3465 ins_encode %{ 3466 int vlen_enc = Assembler::AVX_128bit; 3467 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3468 ExternalAddress(float_signmask()), vlen_enc); 3469 %} 3470 ins_pipe(pipe_slow); 3471 %} 3472 3473 instruct absD_reg(regD dst) %{ 3474 predicate(UseAVX == 0); 3475 match(Set dst (AbsD dst)); 3476 ins_cost(150); 3477 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3478 "# abs double by sign masking" %} 3479 ins_encode %{ 3480 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3481 %} 3482 ins_pipe(pipe_slow); 3483 %} 3484 3485 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3486 predicate(UseAVX > 0); 3487 match(Set dst (AbsD src)); 3488 ins_cost(150); 3489 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3490 "# abs double by sign masking" %} 3491 ins_encode %{ 3492 int vlen_enc = Assembler::AVX_128bit; 3493 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3494 ExternalAddress(double_signmask()), vlen_enc); 3495 %} 3496 ins_pipe(pipe_slow); 3497 %} 3498 3499 instruct negF_reg(regF dst) %{ 3500 predicate(UseAVX == 0); 3501 match(Set dst (NegF dst)); 3502 ins_cost(150); 3503 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3504 ins_encode %{ 3505 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3506 %} 3507 ins_pipe(pipe_slow); 3508 %} 3509 3510 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3511 predicate(UseAVX > 0); 3512 match(Set dst (NegF src)); 3513 ins_cost(150); 3514 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3515 ins_encode %{ 3516 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3517 ExternalAddress(float_signflip())); 3518 %} 3519 ins_pipe(pipe_slow); 3520 %} 3521 3522 instruct negD_reg(regD dst) %{ 3523 predicate(UseAVX == 0); 3524 match(Set dst (NegD dst)); 3525 ins_cost(150); 3526 format %{ "xorpd $dst, [0x8000000000000000]\t" 3527 "# neg double by sign flipping" %} 3528 ins_encode %{ 3529 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3530 %} 3531 ins_pipe(pipe_slow); 3532 %} 3533 3534 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3535 predicate(UseAVX > 0); 3536 match(Set dst (NegD src)); 3537 ins_cost(150); 3538 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3539 "# neg double by sign flipping" %} 3540 ins_encode %{ 3541 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3542 ExternalAddress(double_signflip())); 3543 %} 3544 ins_pipe(pipe_slow); 3545 %} 3546 3547 // sqrtss instruction needs destination register to be pre initialized for best performance 3548 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3549 instruct sqrtF_reg(regF dst) %{ 3550 match(Set dst (SqrtF dst)); 3551 format %{ "sqrtss $dst, $dst" %} 3552 ins_encode %{ 3553 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3554 %} 3555 ins_pipe(pipe_slow); 3556 %} 3557 3558 // sqrtsd instruction needs destination register to be pre initialized for best performance 3559 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3560 instruct sqrtD_reg(regD dst) %{ 3561 match(Set dst (SqrtD dst)); 3562 format %{ "sqrtsd $dst, $dst" %} 3563 ins_encode %{ 3564 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3565 %} 3566 ins_pipe(pipe_slow); 3567 %} 3568 3569 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3570 effect(TEMP tmp); 3571 match(Set dst (ConvF2HF src)); 3572 ins_cost(125); 3573 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3574 ins_encode %{ 3575 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3576 %} 3577 ins_pipe( pipe_slow ); 3578 %} 3579 3580 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3581 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3582 effect(TEMP ktmp, TEMP rtmp); 3583 match(Set mem (StoreC mem (ConvF2HF src))); 3584 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3585 ins_encode %{ 3586 __ movl($rtmp$$Register, 0x1); 3587 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3588 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3589 %} 3590 ins_pipe( pipe_slow ); 3591 %} 3592 3593 instruct vconvF2HF(vec dst, vec src) %{ 3594 match(Set dst (VectorCastF2HF src)); 3595 format %{ "vector_conv_F2HF $dst $src" %} 3596 ins_encode %{ 3597 int vlen_enc = vector_length_encoding(this, $src); 3598 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 3603 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3604 predicate(n->as_StoreVector()->memory_size() >= 16); 3605 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3606 format %{ "vcvtps2ph $mem,$src" %} 3607 ins_encode %{ 3608 int vlen_enc = vector_length_encoding(this, $src); 3609 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3610 %} 3611 ins_pipe( pipe_slow ); 3612 %} 3613 3614 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3615 match(Set dst (ConvHF2F src)); 3616 format %{ "vcvtph2ps $dst,$src" %} 3617 ins_encode %{ 3618 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3619 %} 3620 ins_pipe( pipe_slow ); 3621 %} 3622 3623 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3624 match(Set dst (VectorCastHF2F (LoadVector mem))); 3625 format %{ "vcvtph2ps $dst,$mem" %} 3626 ins_encode %{ 3627 int vlen_enc = vector_length_encoding(this); 3628 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct vconvHF2F(vec dst, vec src) %{ 3634 match(Set dst (VectorCastHF2F src)); 3635 ins_cost(125); 3636 format %{ "vector_conv_HF2F $dst,$src" %} 3637 ins_encode %{ 3638 int vlen_enc = vector_length_encoding(this); 3639 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 // ---------------------------------------- VectorReinterpret ------------------------------------ 3645 instruct reinterpret_mask(kReg dst) %{ 3646 predicate(n->bottom_type()->isa_vectmask() && 3647 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3648 match(Set dst (VectorReinterpret dst)); 3649 ins_cost(125); 3650 format %{ "vector_reinterpret $dst\t!" %} 3651 ins_encode %{ 3652 // empty 3653 %} 3654 ins_pipe( pipe_slow ); 3655 %} 3656 3657 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3658 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3659 n->bottom_type()->isa_vectmask() && 3660 n->in(1)->bottom_type()->isa_vectmask() && 3661 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3662 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3663 match(Set dst (VectorReinterpret src)); 3664 effect(TEMP xtmp); 3665 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3666 ins_encode %{ 3667 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3668 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3669 assert(src_sz == dst_sz , "src and dst size mismatch"); 3670 int vlen_enc = vector_length_encoding(src_sz); 3671 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3672 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3678 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3679 n->bottom_type()->isa_vectmask() && 3680 n->in(1)->bottom_type()->isa_vectmask() && 3681 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3682 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3683 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3684 match(Set dst (VectorReinterpret src)); 3685 effect(TEMP xtmp); 3686 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3687 ins_encode %{ 3688 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3689 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3690 assert(src_sz == dst_sz , "src and dst size mismatch"); 3691 int vlen_enc = vector_length_encoding(src_sz); 3692 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3693 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3699 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3700 n->bottom_type()->isa_vectmask() && 3701 n->in(1)->bottom_type()->isa_vectmask() && 3702 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3703 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3704 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3705 match(Set dst (VectorReinterpret src)); 3706 effect(TEMP xtmp); 3707 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3708 ins_encode %{ 3709 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3710 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3711 assert(src_sz == dst_sz , "src and dst size mismatch"); 3712 int vlen_enc = vector_length_encoding(src_sz); 3713 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3714 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct reinterpret(vec dst) %{ 3720 predicate(!n->bottom_type()->isa_vectmask() && 3721 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3722 match(Set dst (VectorReinterpret dst)); 3723 ins_cost(125); 3724 format %{ "vector_reinterpret $dst\t!" %} 3725 ins_encode %{ 3726 // empty 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct reinterpret_expand(vec dst, vec src) %{ 3732 predicate(UseAVX == 0 && 3733 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3734 match(Set dst (VectorReinterpret src)); 3735 ins_cost(125); 3736 effect(TEMP dst); 3737 format %{ "vector_reinterpret_expand $dst,$src" %} 3738 ins_encode %{ 3739 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3740 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3741 3742 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3743 if (src_vlen_in_bytes == 4) { 3744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3745 } else { 3746 assert(src_vlen_in_bytes == 8, ""); 3747 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3748 } 3749 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3750 %} 3751 ins_pipe( pipe_slow ); 3752 %} 3753 3754 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3755 predicate(UseAVX > 0 && 3756 !n->bottom_type()->isa_vectmask() && 3757 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3758 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3759 match(Set dst (VectorReinterpret src)); 3760 ins_cost(125); 3761 format %{ "vector_reinterpret_expand $dst,$src" %} 3762 ins_encode %{ 3763 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3764 %} 3765 ins_pipe( pipe_slow ); 3766 %} 3767 3768 3769 instruct vreinterpret_expand(legVec dst, vec src) %{ 3770 predicate(UseAVX > 0 && 3771 !n->bottom_type()->isa_vectmask() && 3772 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3773 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3774 match(Set dst (VectorReinterpret src)); 3775 ins_cost(125); 3776 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3777 ins_encode %{ 3778 switch (Matcher::vector_length_in_bytes(this, $src)) { 3779 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3780 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3781 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3782 default: ShouldNotReachHere(); 3783 } 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct reinterpret_shrink(vec dst, legVec src) %{ 3789 predicate(!n->bottom_type()->isa_vectmask() && 3790 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3791 match(Set dst (VectorReinterpret src)); 3792 ins_cost(125); 3793 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3794 ins_encode %{ 3795 switch (Matcher::vector_length_in_bytes(this)) { 3796 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3797 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3798 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3799 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3800 default: ShouldNotReachHere(); 3801 } 3802 %} 3803 ins_pipe( pipe_slow ); 3804 %} 3805 3806 // ---------------------------------------------------------------------------------------------------- 3807 3808 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3809 match(Set dst (RoundDoubleMode src rmode)); 3810 format %{ "roundsd $dst,$src" %} 3811 ins_cost(150); 3812 ins_encode %{ 3813 assert(UseSSE >= 4, "required"); 3814 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3815 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3816 } 3817 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3818 %} 3819 ins_pipe(pipe_slow); 3820 %} 3821 3822 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3823 match(Set dst (RoundDoubleMode con rmode)); 3824 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3825 ins_cost(150); 3826 ins_encode %{ 3827 assert(UseSSE >= 4, "required"); 3828 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3829 %} 3830 ins_pipe(pipe_slow); 3831 %} 3832 3833 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3834 predicate(Matcher::vector_length(n) < 8); 3835 match(Set dst (RoundDoubleModeV src rmode)); 3836 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3837 ins_encode %{ 3838 assert(UseAVX > 0, "required"); 3839 int vlen_enc = vector_length_encoding(this); 3840 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3841 %} 3842 ins_pipe( pipe_slow ); 3843 %} 3844 3845 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3846 predicate(Matcher::vector_length(n) == 8); 3847 match(Set dst (RoundDoubleModeV src rmode)); 3848 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3849 ins_encode %{ 3850 assert(UseAVX > 2, "required"); 3851 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3852 %} 3853 ins_pipe( pipe_slow ); 3854 %} 3855 3856 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3857 predicate(Matcher::vector_length(n) < 8); 3858 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3859 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3860 ins_encode %{ 3861 assert(UseAVX > 0, "required"); 3862 int vlen_enc = vector_length_encoding(this); 3863 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3864 %} 3865 ins_pipe( pipe_slow ); 3866 %} 3867 3868 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3869 predicate(Matcher::vector_length(n) == 8); 3870 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3871 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3872 ins_encode %{ 3873 assert(UseAVX > 2, "required"); 3874 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 instruct onspinwait() %{ 3880 match(OnSpinWait); 3881 ins_cost(200); 3882 3883 format %{ 3884 $$template 3885 $$emit$$"pause\t! membar_onspinwait" 3886 %} 3887 ins_encode %{ 3888 __ pause(); 3889 %} 3890 ins_pipe(pipe_slow); 3891 %} 3892 3893 // a * b + c 3894 instruct fmaD_reg(regD a, regD b, regD c) %{ 3895 match(Set c (FmaD c (Binary a b))); 3896 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3897 ins_cost(150); 3898 ins_encode %{ 3899 assert(UseFMA, "Needs FMA instructions support."); 3900 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3901 %} 3902 ins_pipe( pipe_slow ); 3903 %} 3904 3905 // a * b + c 3906 instruct fmaF_reg(regF a, regF b, regF c) %{ 3907 match(Set c (FmaF c (Binary a b))); 3908 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3909 ins_cost(150); 3910 ins_encode %{ 3911 assert(UseFMA, "Needs FMA instructions support."); 3912 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 // ====================VECTOR INSTRUCTIONS===================================== 3918 3919 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3920 instruct MoveVec2Leg(legVec dst, vec src) %{ 3921 match(Set dst src); 3922 format %{ "" %} 3923 ins_encode %{ 3924 ShouldNotReachHere(); 3925 %} 3926 ins_pipe( fpu_reg_reg ); 3927 %} 3928 3929 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3930 match(Set dst src); 3931 format %{ "" %} 3932 ins_encode %{ 3933 ShouldNotReachHere(); 3934 %} 3935 ins_pipe( fpu_reg_reg ); 3936 %} 3937 3938 // ============================================================================ 3939 3940 // Load vectors generic operand pattern 3941 instruct loadV(vec dst, memory mem) %{ 3942 match(Set dst (LoadVector mem)); 3943 ins_cost(125); 3944 format %{ "load_vector $dst,$mem" %} 3945 ins_encode %{ 3946 BasicType bt = Matcher::vector_element_basic_type(this); 3947 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3948 %} 3949 ins_pipe( pipe_slow ); 3950 %} 3951 3952 // Store vectors generic operand pattern. 3953 instruct storeV(memory mem, vec src) %{ 3954 match(Set mem (StoreVector mem src)); 3955 ins_cost(145); 3956 format %{ "store_vector $mem,$src\n\t" %} 3957 ins_encode %{ 3958 switch (Matcher::vector_length_in_bytes(this, $src)) { 3959 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3960 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3961 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3962 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3963 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3964 default: ShouldNotReachHere(); 3965 } 3966 %} 3967 ins_pipe( pipe_slow ); 3968 %} 3969 3970 // ---------------------------------------- Gather ------------------------------------ 3971 3972 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 3973 3974 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3975 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 3976 Matcher::vector_length_in_bytes(n) <= 32); 3977 match(Set dst (LoadVectorGather mem idx)); 3978 effect(TEMP dst, TEMP tmp, TEMP mask); 3979 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3980 ins_encode %{ 3981 int vlen_enc = vector_length_encoding(this); 3982 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3983 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3984 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3985 __ lea($tmp$$Register, $mem$$Address); 3986 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3987 %} 3988 ins_pipe( pipe_slow ); 3989 %} 3990 3991 3992 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3993 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 3994 !is_subword_type(Matcher::vector_element_basic_type(n))); 3995 match(Set dst (LoadVectorGather mem idx)); 3996 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3997 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3998 ins_encode %{ 3999 int vlen_enc = vector_length_encoding(this); 4000 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4001 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4002 __ lea($tmp$$Register, $mem$$Address); 4003 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4009 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4010 !is_subword_type(Matcher::vector_element_basic_type(n))); 4011 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4012 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4013 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4014 ins_encode %{ 4015 assert(UseAVX > 2, "sanity"); 4016 int vlen_enc = vector_length_encoding(this); 4017 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4018 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4019 // Note: Since gather instruction partially updates the opmask register used 4020 // for predication hense moving mask operand to a temporary. 4021 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4022 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4023 __ lea($tmp$$Register, $mem$$Address); 4024 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4025 %} 4026 ins_pipe( pipe_slow ); 4027 %} 4028 4029 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{ 4030 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4031 match(Set dst (LoadVectorGather mem idx_base)); 4032 effect(TEMP tmp, TEMP rtmp); 4033 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4034 ins_encode %{ 4035 int vlen_enc = vector_length_encoding(this); 4036 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4037 __ lea($tmp$$Register, $mem$$Address); 4038 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp, 4044 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4045 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4046 match(Set dst (LoadVectorGather mem idx_base)); 4047 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4048 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4049 ins_encode %{ 4050 int vlen_enc = vector_length_encoding(this); 4051 int vector_len = Matcher::vector_length(this); 4052 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4053 __ lea($tmp$$Register, $mem$$Address); 4054 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4055 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister, 4056 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4057 %} 4058 ins_pipe( pipe_slow ); 4059 %} 4060 4061 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4062 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4063 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4064 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4065 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4066 ins_encode %{ 4067 int vlen_enc = vector_length_encoding(this); 4068 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4069 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4070 __ lea($tmp$$Register, $mem$$Address); 4071 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4072 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4073 %} 4074 ins_pipe( pipe_slow ); 4075 %} 4076 4077 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp, 4078 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4079 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4080 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4081 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4082 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4083 ins_encode %{ 4084 int vlen_enc = vector_length_encoding(this); 4085 int vector_len = Matcher::vector_length(this); 4086 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4087 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4088 __ lea($tmp$$Register, $mem$$Address); 4089 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4090 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4091 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4092 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4093 %} 4094 ins_pipe( pipe_slow ); 4095 %} 4096 4097 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4098 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4099 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4100 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4101 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4102 ins_encode %{ 4103 int vlen_enc = vector_length_encoding(this); 4104 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4105 __ lea($tmp$$Register, $mem$$Address); 4106 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4107 if (elem_bt == T_SHORT) { 4108 __ movl($mask_idx$$Register, 0x55555555); 4109 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4110 } 4111 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4112 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4113 %} 4114 ins_pipe( pipe_slow ); 4115 %} 4116 4117 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp, 4118 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4119 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4120 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4121 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4122 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4123 ins_encode %{ 4124 int vlen_enc = vector_length_encoding(this); 4125 int vector_len = Matcher::vector_length(this); 4126 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4127 __ lea($tmp$$Register, $mem$$Address); 4128 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4129 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4130 if (elem_bt == T_SHORT) { 4131 __ movl($mask_idx$$Register, 0x55555555); 4132 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4133 } 4134 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4135 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4136 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4137 %} 4138 ins_pipe( pipe_slow ); 4139 %} 4140 4141 // ====================Scatter======================================= 4142 4143 // Scatter INT, LONG, FLOAT, DOUBLE 4144 4145 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4146 predicate(UseAVX > 2); 4147 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4148 effect(TEMP tmp, TEMP ktmp); 4149 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4150 ins_encode %{ 4151 int vlen_enc = vector_length_encoding(this, $src); 4152 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4153 4154 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4155 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4156 4157 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4158 __ lea($tmp$$Register, $mem$$Address); 4159 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4165 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4166 effect(TEMP tmp, TEMP ktmp); 4167 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4168 ins_encode %{ 4169 int vlen_enc = vector_length_encoding(this, $src); 4170 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4171 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4172 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4173 // Note: Since scatter instruction partially updates the opmask register used 4174 // for predication hense moving mask operand to a temporary. 4175 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4176 __ lea($tmp$$Register, $mem$$Address); 4177 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4178 %} 4179 ins_pipe( pipe_slow ); 4180 %} 4181 4182 // ====================REPLICATE======================================= 4183 4184 // Replicate byte scalar to be vector 4185 instruct vReplB_reg(vec dst, rRegI src) %{ 4186 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4187 match(Set dst (Replicate src)); 4188 format %{ "replicateB $dst,$src" %} 4189 ins_encode %{ 4190 uint vlen = Matcher::vector_length(this); 4191 if (UseAVX >= 2) { 4192 int vlen_enc = vector_length_encoding(this); 4193 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4194 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4195 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4196 } else { 4197 __ movdl($dst$$XMMRegister, $src$$Register); 4198 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4199 } 4200 } else { 4201 assert(UseAVX < 2, ""); 4202 __ movdl($dst$$XMMRegister, $src$$Register); 4203 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4204 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4205 if (vlen >= 16) { 4206 assert(vlen == 16, ""); 4207 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4208 } 4209 } 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct ReplB_mem(vec dst, memory mem) %{ 4215 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4216 match(Set dst (Replicate (LoadB mem))); 4217 format %{ "replicateB $dst,$mem" %} 4218 ins_encode %{ 4219 int vlen_enc = vector_length_encoding(this); 4220 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4221 %} 4222 ins_pipe( pipe_slow ); 4223 %} 4224 4225 // ====================ReplicateS======================================= 4226 4227 instruct vReplS_reg(vec dst, rRegI src) %{ 4228 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4229 match(Set dst (Replicate src)); 4230 format %{ "replicateS $dst,$src" %} 4231 ins_encode %{ 4232 uint vlen = Matcher::vector_length(this); 4233 int vlen_enc = vector_length_encoding(this); 4234 if (UseAVX >= 2) { 4235 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4236 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4237 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4238 } else { 4239 __ movdl($dst$$XMMRegister, $src$$Register); 4240 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4241 } 4242 } else { 4243 assert(UseAVX < 2, ""); 4244 __ movdl($dst$$XMMRegister, $src$$Register); 4245 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4246 if (vlen >= 8) { 4247 assert(vlen == 8, ""); 4248 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4249 } 4250 } 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4256 match(Set dst (Replicate con)); 4257 effect(TEMP rtmp); 4258 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4259 ins_encode %{ 4260 int vlen_enc = vector_length_encoding(this); 4261 BasicType bt = Matcher::vector_element_basic_type(this); 4262 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4263 __ movl($rtmp$$Register, $con$$constant); 4264 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4265 %} 4266 ins_pipe( pipe_slow ); 4267 %} 4268 4269 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4270 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4271 match(Set dst (Replicate src)); 4272 effect(TEMP rtmp); 4273 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4274 ins_encode %{ 4275 int vlen_enc = vector_length_encoding(this); 4276 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4277 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4278 %} 4279 ins_pipe( pipe_slow ); 4280 %} 4281 4282 instruct ReplS_mem(vec dst, memory mem) %{ 4283 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4284 match(Set dst (Replicate (LoadS mem))); 4285 format %{ "replicateS $dst,$mem" %} 4286 ins_encode %{ 4287 int vlen_enc = vector_length_encoding(this); 4288 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4289 %} 4290 ins_pipe( pipe_slow ); 4291 %} 4292 4293 // ====================ReplicateI======================================= 4294 4295 instruct ReplI_reg(vec dst, rRegI src) %{ 4296 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4297 match(Set dst (Replicate src)); 4298 format %{ "replicateI $dst,$src" %} 4299 ins_encode %{ 4300 uint vlen = Matcher::vector_length(this); 4301 int vlen_enc = vector_length_encoding(this); 4302 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4303 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4304 } else if (VM_Version::supports_avx2()) { 4305 __ movdl($dst$$XMMRegister, $src$$Register); 4306 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4307 } else { 4308 __ movdl($dst$$XMMRegister, $src$$Register); 4309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4310 } 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 instruct ReplI_mem(vec dst, memory mem) %{ 4316 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4317 match(Set dst (Replicate (LoadI mem))); 4318 format %{ "replicateI $dst,$mem" %} 4319 ins_encode %{ 4320 int vlen_enc = vector_length_encoding(this); 4321 if (VM_Version::supports_avx2()) { 4322 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4323 } else if (VM_Version::supports_avx()) { 4324 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4325 } else { 4326 __ movdl($dst$$XMMRegister, $mem$$Address); 4327 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4328 } 4329 %} 4330 ins_pipe( pipe_slow ); 4331 %} 4332 4333 instruct ReplI_imm(vec dst, immI con) %{ 4334 predicate(Matcher::is_non_long_integral_vector(n)); 4335 match(Set dst (Replicate con)); 4336 format %{ "replicateI $dst,$con" %} 4337 ins_encode %{ 4338 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4339 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4340 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4341 BasicType bt = Matcher::vector_element_basic_type(this); 4342 int vlen = Matcher::vector_length_in_bytes(this); 4343 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4344 %} 4345 ins_pipe( pipe_slow ); 4346 %} 4347 4348 // Replicate scalar zero to be vector 4349 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4350 predicate(Matcher::is_non_long_integral_vector(n)); 4351 match(Set dst (Replicate zero)); 4352 format %{ "replicateI $dst,$zero" %} 4353 ins_encode %{ 4354 int vlen_enc = vector_length_encoding(this); 4355 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4356 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4357 } else { 4358 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4359 } 4360 %} 4361 ins_pipe( fpu_reg_reg ); 4362 %} 4363 4364 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4365 predicate(Matcher::is_non_long_integral_vector(n)); 4366 match(Set dst (Replicate con)); 4367 format %{ "vallones $dst" %} 4368 ins_encode %{ 4369 int vector_len = vector_length_encoding(this); 4370 __ vallones($dst$$XMMRegister, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 // ====================ReplicateL======================================= 4376 4377 // Replicate long (8 byte) scalar to be vector 4378 instruct ReplL_reg(vec dst, rRegL src) %{ 4379 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4380 match(Set dst (Replicate src)); 4381 format %{ "replicateL $dst,$src" %} 4382 ins_encode %{ 4383 int vlen = Matcher::vector_length(this); 4384 int vlen_enc = vector_length_encoding(this); 4385 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4386 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4387 } else if (VM_Version::supports_avx2()) { 4388 __ movdq($dst$$XMMRegister, $src$$Register); 4389 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4390 } else { 4391 __ movdq($dst$$XMMRegister, $src$$Register); 4392 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4393 } 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct ReplL_mem(vec dst, memory mem) %{ 4399 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4400 match(Set dst (Replicate (LoadL mem))); 4401 format %{ "replicateL $dst,$mem" %} 4402 ins_encode %{ 4403 int vlen_enc = vector_length_encoding(this); 4404 if (VM_Version::supports_avx2()) { 4405 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4406 } else if (VM_Version::supports_sse3()) { 4407 __ movddup($dst$$XMMRegister, $mem$$Address); 4408 } else { 4409 __ movq($dst$$XMMRegister, $mem$$Address); 4410 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4411 } 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4417 instruct ReplL_imm(vec dst, immL con) %{ 4418 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4419 match(Set dst (Replicate con)); 4420 format %{ "replicateL $dst,$con" %} 4421 ins_encode %{ 4422 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4423 int vlen = Matcher::vector_length_in_bytes(this); 4424 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct ReplL_zero(vec dst, immL0 zero) %{ 4430 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4431 match(Set dst (Replicate zero)); 4432 format %{ "replicateL $dst,$zero" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this); 4435 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4437 } else { 4438 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4439 } 4440 %} 4441 ins_pipe( fpu_reg_reg ); 4442 %} 4443 4444 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4445 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4446 match(Set dst (Replicate con)); 4447 format %{ "vallones $dst" %} 4448 ins_encode %{ 4449 int vector_len = vector_length_encoding(this); 4450 __ vallones($dst$$XMMRegister, vector_len); 4451 %} 4452 ins_pipe( pipe_slow ); 4453 %} 4454 4455 // ====================ReplicateF======================================= 4456 4457 instruct vReplF_reg(vec dst, vlRegF src) %{ 4458 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4459 match(Set dst (Replicate src)); 4460 format %{ "replicateF $dst,$src" %} 4461 ins_encode %{ 4462 uint vlen = Matcher::vector_length(this); 4463 int vlen_enc = vector_length_encoding(this); 4464 if (vlen <= 4) { 4465 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4466 } else if (VM_Version::supports_avx2()) { 4467 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4468 } else { 4469 assert(vlen == 8, "sanity"); 4470 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4471 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4472 } 4473 %} 4474 ins_pipe( pipe_slow ); 4475 %} 4476 4477 instruct ReplF_reg(vec dst, vlRegF src) %{ 4478 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4479 match(Set dst (Replicate src)); 4480 format %{ "replicateF $dst,$src" %} 4481 ins_encode %{ 4482 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4483 %} 4484 ins_pipe( pipe_slow ); 4485 %} 4486 4487 instruct ReplF_mem(vec dst, memory mem) %{ 4488 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4489 match(Set dst (Replicate (LoadF mem))); 4490 format %{ "replicateF $dst,$mem" %} 4491 ins_encode %{ 4492 int vlen_enc = vector_length_encoding(this); 4493 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 // Replicate float scalar immediate to be vector by loading from const table. 4499 instruct ReplF_imm(vec dst, immF con) %{ 4500 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4501 match(Set dst (Replicate con)); 4502 format %{ "replicateF $dst,$con" %} 4503 ins_encode %{ 4504 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4505 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4506 int vlen = Matcher::vector_length_in_bytes(this); 4507 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct ReplF_zero(vec dst, immF0 zero) %{ 4513 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4514 match(Set dst (Replicate zero)); 4515 format %{ "replicateF $dst,$zero" %} 4516 ins_encode %{ 4517 int vlen_enc = vector_length_encoding(this); 4518 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4519 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4520 } else { 4521 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4522 } 4523 %} 4524 ins_pipe( fpu_reg_reg ); 4525 %} 4526 4527 // ====================ReplicateD======================================= 4528 4529 // Replicate double (8 bytes) scalar to be vector 4530 instruct vReplD_reg(vec dst, vlRegD src) %{ 4531 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4532 match(Set dst (Replicate src)); 4533 format %{ "replicateD $dst,$src" %} 4534 ins_encode %{ 4535 uint vlen = Matcher::vector_length(this); 4536 int vlen_enc = vector_length_encoding(this); 4537 if (vlen <= 2) { 4538 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4539 } else if (VM_Version::supports_avx2()) { 4540 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4541 } else { 4542 assert(vlen == 4, "sanity"); 4543 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4544 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4545 } 4546 %} 4547 ins_pipe( pipe_slow ); 4548 %} 4549 4550 instruct ReplD_reg(vec dst, vlRegD src) %{ 4551 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4552 match(Set dst (Replicate src)); 4553 format %{ "replicateD $dst,$src" %} 4554 ins_encode %{ 4555 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4556 %} 4557 ins_pipe( pipe_slow ); 4558 %} 4559 4560 instruct ReplD_mem(vec dst, memory mem) %{ 4561 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4562 match(Set dst (Replicate (LoadD mem))); 4563 format %{ "replicateD $dst,$mem" %} 4564 ins_encode %{ 4565 if (Matcher::vector_length(this) >= 4) { 4566 int vlen_enc = vector_length_encoding(this); 4567 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4568 } else { 4569 __ movddup($dst$$XMMRegister, $mem$$Address); 4570 } 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4576 instruct ReplD_imm(vec dst, immD con) %{ 4577 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4578 match(Set dst (Replicate con)); 4579 format %{ "replicateD $dst,$con" %} 4580 ins_encode %{ 4581 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4582 int vlen = Matcher::vector_length_in_bytes(this); 4583 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct ReplD_zero(vec dst, immD0 zero) %{ 4589 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4590 match(Set dst (Replicate zero)); 4591 format %{ "replicateD $dst,$zero" %} 4592 ins_encode %{ 4593 int vlen_enc = vector_length_encoding(this); 4594 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4595 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4596 } else { 4597 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4598 } 4599 %} 4600 ins_pipe( fpu_reg_reg ); 4601 %} 4602 4603 // ====================VECTOR INSERT======================================= 4604 4605 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4606 predicate(Matcher::vector_length_in_bytes(n) < 32); 4607 match(Set dst (VectorInsert (Binary dst val) idx)); 4608 format %{ "vector_insert $dst,$val,$idx" %} 4609 ins_encode %{ 4610 assert(UseSSE >= 4, "required"); 4611 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4612 4613 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4614 4615 assert(is_integral_type(elem_bt), ""); 4616 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4617 4618 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4619 %} 4620 ins_pipe( pipe_slow ); 4621 %} 4622 4623 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4624 predicate(Matcher::vector_length_in_bytes(n) == 32); 4625 match(Set dst (VectorInsert (Binary src val) idx)); 4626 effect(TEMP vtmp); 4627 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4628 ins_encode %{ 4629 int vlen_enc = Assembler::AVX_256bit; 4630 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4631 int elem_per_lane = 16/type2aelembytes(elem_bt); 4632 int log2epr = log2(elem_per_lane); 4633 4634 assert(is_integral_type(elem_bt), "sanity"); 4635 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4636 4637 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4638 uint y_idx = ($idx$$constant >> log2epr) & 1; 4639 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4640 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4641 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4647 predicate(Matcher::vector_length_in_bytes(n) == 64); 4648 match(Set dst (VectorInsert (Binary src val) idx)); 4649 effect(TEMP vtmp); 4650 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4651 ins_encode %{ 4652 assert(UseAVX > 2, "sanity"); 4653 4654 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4655 int elem_per_lane = 16/type2aelembytes(elem_bt); 4656 int log2epr = log2(elem_per_lane); 4657 4658 assert(is_integral_type(elem_bt), ""); 4659 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4660 4661 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4662 uint y_idx = ($idx$$constant >> log2epr) & 3; 4663 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4664 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4665 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4666 %} 4667 ins_pipe( pipe_slow ); 4668 %} 4669 4670 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4671 predicate(Matcher::vector_length(n) == 2); 4672 match(Set dst (VectorInsert (Binary dst val) idx)); 4673 format %{ "vector_insert $dst,$val,$idx" %} 4674 ins_encode %{ 4675 assert(UseSSE >= 4, "required"); 4676 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4677 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4678 4679 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4680 %} 4681 ins_pipe( pipe_slow ); 4682 %} 4683 4684 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4685 predicate(Matcher::vector_length(n) == 4); 4686 match(Set dst (VectorInsert (Binary src val) idx)); 4687 effect(TEMP vtmp); 4688 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4689 ins_encode %{ 4690 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4691 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4692 4693 uint x_idx = $idx$$constant & right_n_bits(1); 4694 uint y_idx = ($idx$$constant >> 1) & 1; 4695 int vlen_enc = Assembler::AVX_256bit; 4696 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4697 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4698 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4704 predicate(Matcher::vector_length(n) == 8); 4705 match(Set dst (VectorInsert (Binary src val) idx)); 4706 effect(TEMP vtmp); 4707 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4708 ins_encode %{ 4709 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4710 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4711 4712 uint x_idx = $idx$$constant & right_n_bits(1); 4713 uint y_idx = ($idx$$constant >> 1) & 3; 4714 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4715 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4716 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4717 %} 4718 ins_pipe( pipe_slow ); 4719 %} 4720 4721 instruct insertF(vec dst, regF val, immU8 idx) %{ 4722 predicate(Matcher::vector_length(n) < 8); 4723 match(Set dst (VectorInsert (Binary dst val) idx)); 4724 format %{ "vector_insert $dst,$val,$idx" %} 4725 ins_encode %{ 4726 assert(UseSSE >= 4, "sanity"); 4727 4728 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4729 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4730 4731 uint x_idx = $idx$$constant & right_n_bits(2); 4732 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4738 predicate(Matcher::vector_length(n) >= 8); 4739 match(Set dst (VectorInsert (Binary src val) idx)); 4740 effect(TEMP vtmp); 4741 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4742 ins_encode %{ 4743 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4744 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4745 4746 int vlen = Matcher::vector_length(this); 4747 uint x_idx = $idx$$constant & right_n_bits(2); 4748 if (vlen == 8) { 4749 uint y_idx = ($idx$$constant >> 2) & 1; 4750 int vlen_enc = Assembler::AVX_256bit; 4751 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4752 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4753 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4754 } else { 4755 assert(vlen == 16, "sanity"); 4756 uint y_idx = ($idx$$constant >> 2) & 3; 4757 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4758 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4759 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4760 } 4761 %} 4762 ins_pipe( pipe_slow ); 4763 %} 4764 4765 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4766 predicate(Matcher::vector_length(n) == 2); 4767 match(Set dst (VectorInsert (Binary dst val) idx)); 4768 effect(TEMP tmp); 4769 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4770 ins_encode %{ 4771 assert(UseSSE >= 4, "sanity"); 4772 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4773 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4774 4775 __ movq($tmp$$Register, $val$$XMMRegister); 4776 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4777 %} 4778 ins_pipe( pipe_slow ); 4779 %} 4780 4781 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4782 predicate(Matcher::vector_length(n) == 4); 4783 match(Set dst (VectorInsert (Binary src val) idx)); 4784 effect(TEMP vtmp, TEMP tmp); 4785 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4786 ins_encode %{ 4787 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4788 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4789 4790 uint x_idx = $idx$$constant & right_n_bits(1); 4791 uint y_idx = ($idx$$constant >> 1) & 1; 4792 int vlen_enc = Assembler::AVX_256bit; 4793 __ movq($tmp$$Register, $val$$XMMRegister); 4794 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4795 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4796 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4797 %} 4798 ins_pipe( pipe_slow ); 4799 %} 4800 4801 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4802 predicate(Matcher::vector_length(n) == 8); 4803 match(Set dst (VectorInsert (Binary src val) idx)); 4804 effect(TEMP tmp, TEMP vtmp); 4805 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4806 ins_encode %{ 4807 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4808 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4809 4810 uint x_idx = $idx$$constant & right_n_bits(1); 4811 uint y_idx = ($idx$$constant >> 1) & 3; 4812 __ movq($tmp$$Register, $val$$XMMRegister); 4813 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4814 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4815 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4816 %} 4817 ins_pipe( pipe_slow ); 4818 %} 4819 4820 // ====================REDUCTION ARITHMETIC======================================= 4821 4822 // =======================Int Reduction========================================== 4823 4824 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4825 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4826 match(Set dst (AddReductionVI src1 src2)); 4827 match(Set dst (MulReductionVI src1 src2)); 4828 match(Set dst (AndReductionV src1 src2)); 4829 match(Set dst ( OrReductionV src1 src2)); 4830 match(Set dst (XorReductionV src1 src2)); 4831 match(Set dst (MinReductionV src1 src2)); 4832 match(Set dst (MaxReductionV src1 src2)); 4833 effect(TEMP vtmp1, TEMP vtmp2); 4834 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4835 ins_encode %{ 4836 int opcode = this->ideal_Opcode(); 4837 int vlen = Matcher::vector_length(this, $src2); 4838 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4839 %} 4840 ins_pipe( pipe_slow ); 4841 %} 4842 4843 // =======================Long Reduction========================================== 4844 4845 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4846 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4847 match(Set dst (AddReductionVL src1 src2)); 4848 match(Set dst (MulReductionVL src1 src2)); 4849 match(Set dst (AndReductionV src1 src2)); 4850 match(Set dst ( OrReductionV src1 src2)); 4851 match(Set dst (XorReductionV src1 src2)); 4852 match(Set dst (MinReductionV src1 src2)); 4853 match(Set dst (MaxReductionV src1 src2)); 4854 effect(TEMP vtmp1, TEMP vtmp2); 4855 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4856 ins_encode %{ 4857 int opcode = this->ideal_Opcode(); 4858 int vlen = Matcher::vector_length(this, $src2); 4859 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4860 %} 4861 ins_pipe( pipe_slow ); 4862 %} 4863 4864 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4865 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4866 match(Set dst (AddReductionVL src1 src2)); 4867 match(Set dst (MulReductionVL src1 src2)); 4868 match(Set dst (AndReductionV src1 src2)); 4869 match(Set dst ( OrReductionV src1 src2)); 4870 match(Set dst (XorReductionV src1 src2)); 4871 match(Set dst (MinReductionV src1 src2)); 4872 match(Set dst (MaxReductionV src1 src2)); 4873 effect(TEMP vtmp1, TEMP vtmp2); 4874 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4875 ins_encode %{ 4876 int opcode = this->ideal_Opcode(); 4877 int vlen = Matcher::vector_length(this, $src2); 4878 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 // =======================Float Reduction========================================== 4884 4885 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4886 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 4887 match(Set dst (AddReductionVF dst src)); 4888 match(Set dst (MulReductionVF dst src)); 4889 effect(TEMP dst, TEMP vtmp); 4890 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4891 ins_encode %{ 4892 int opcode = this->ideal_Opcode(); 4893 int vlen = Matcher::vector_length(this, $src); 4894 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4900 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 4901 match(Set dst (AddReductionVF dst src)); 4902 match(Set dst (MulReductionVF dst src)); 4903 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4904 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4905 ins_encode %{ 4906 int opcode = this->ideal_Opcode(); 4907 int vlen = Matcher::vector_length(this, $src); 4908 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4909 %} 4910 ins_pipe( pipe_slow ); 4911 %} 4912 4913 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4914 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 4915 match(Set dst (AddReductionVF dst src)); 4916 match(Set dst (MulReductionVF dst src)); 4917 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4918 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4919 ins_encode %{ 4920 int opcode = this->ideal_Opcode(); 4921 int vlen = Matcher::vector_length(this, $src); 4922 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4923 %} 4924 ins_pipe( pipe_slow ); 4925 %} 4926 4927 4928 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 4929 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4930 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4931 // src1 contains reduction identity 4932 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 4933 match(Set dst (AddReductionVF src1 src2)); 4934 match(Set dst (MulReductionVF src1 src2)); 4935 effect(TEMP dst); 4936 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 4937 ins_encode %{ 4938 int opcode = this->ideal_Opcode(); 4939 int vlen = Matcher::vector_length(this, $src2); 4940 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 4941 %} 4942 ins_pipe( pipe_slow ); 4943 %} 4944 4945 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 4946 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4947 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4948 // src1 contains reduction identity 4949 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 4950 match(Set dst (AddReductionVF src1 src2)); 4951 match(Set dst (MulReductionVF src1 src2)); 4952 effect(TEMP dst, TEMP vtmp); 4953 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 4954 ins_encode %{ 4955 int opcode = this->ideal_Opcode(); 4956 int vlen = Matcher::vector_length(this, $src2); 4957 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 4958 %} 4959 ins_pipe( pipe_slow ); 4960 %} 4961 4962 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 4963 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4964 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4965 // src1 contains reduction identity 4966 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 4967 match(Set dst (AddReductionVF src1 src2)); 4968 match(Set dst (MulReductionVF src1 src2)); 4969 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4970 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4971 ins_encode %{ 4972 int opcode = this->ideal_Opcode(); 4973 int vlen = Matcher::vector_length(this, $src2); 4974 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4975 %} 4976 ins_pipe( pipe_slow ); 4977 %} 4978 4979 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4980 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4981 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4982 // src1 contains reduction identity 4983 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 4984 match(Set dst (AddReductionVF src1 src2)); 4985 match(Set dst (MulReductionVF src1 src2)); 4986 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4987 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4988 ins_encode %{ 4989 int opcode = this->ideal_Opcode(); 4990 int vlen = Matcher::vector_length(this, $src2); 4991 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 // =======================Double Reduction========================================== 4997 4998 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4999 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5000 match(Set dst (AddReductionVD dst src)); 5001 match(Set dst (MulReductionVD dst src)); 5002 effect(TEMP dst, TEMP vtmp); 5003 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5004 ins_encode %{ 5005 int opcode = this->ideal_Opcode(); 5006 int vlen = Matcher::vector_length(this, $src); 5007 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5008 %} 5009 ins_pipe( pipe_slow ); 5010 %} 5011 5012 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5013 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5014 match(Set dst (AddReductionVD dst src)); 5015 match(Set dst (MulReductionVD dst src)); 5016 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5017 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5018 ins_encode %{ 5019 int opcode = this->ideal_Opcode(); 5020 int vlen = Matcher::vector_length(this, $src); 5021 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5022 %} 5023 ins_pipe( pipe_slow ); 5024 %} 5025 5026 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5027 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5028 match(Set dst (AddReductionVD dst src)); 5029 match(Set dst (MulReductionVD dst src)); 5030 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5031 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5032 ins_encode %{ 5033 int opcode = this->ideal_Opcode(); 5034 int vlen = Matcher::vector_length(this, $src); 5035 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5036 %} 5037 ins_pipe( pipe_slow ); 5038 %} 5039 5040 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5041 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5042 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5043 // src1 contains reduction identity 5044 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5045 match(Set dst (AddReductionVD src1 src2)); 5046 match(Set dst (MulReductionVD src1 src2)); 5047 effect(TEMP dst); 5048 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5049 ins_encode %{ 5050 int opcode = this->ideal_Opcode(); 5051 int vlen = Matcher::vector_length(this, $src2); 5052 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5053 %} 5054 ins_pipe( pipe_slow ); 5055 %} 5056 5057 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5058 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5059 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5060 // src1 contains reduction identity 5061 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5062 match(Set dst (AddReductionVD src1 src2)); 5063 match(Set dst (MulReductionVD src1 src2)); 5064 effect(TEMP dst, TEMP vtmp); 5065 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5066 ins_encode %{ 5067 int opcode = this->ideal_Opcode(); 5068 int vlen = Matcher::vector_length(this, $src2); 5069 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5070 %} 5071 ins_pipe( pipe_slow ); 5072 %} 5073 5074 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5075 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5076 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5077 // src1 contains reduction identity 5078 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5079 match(Set dst (AddReductionVD src1 src2)); 5080 match(Set dst (MulReductionVD src1 src2)); 5081 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5082 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5083 ins_encode %{ 5084 int opcode = this->ideal_Opcode(); 5085 int vlen = Matcher::vector_length(this, $src2); 5086 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5087 %} 5088 ins_pipe( pipe_slow ); 5089 %} 5090 5091 // =======================Byte Reduction========================================== 5092 5093 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5094 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5095 match(Set dst (AddReductionVI src1 src2)); 5096 match(Set dst (AndReductionV src1 src2)); 5097 match(Set dst ( OrReductionV src1 src2)); 5098 match(Set dst (XorReductionV src1 src2)); 5099 match(Set dst (MinReductionV src1 src2)); 5100 match(Set dst (MaxReductionV src1 src2)); 5101 effect(TEMP vtmp1, TEMP vtmp2); 5102 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5103 ins_encode %{ 5104 int opcode = this->ideal_Opcode(); 5105 int vlen = Matcher::vector_length(this, $src2); 5106 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5107 %} 5108 ins_pipe( pipe_slow ); 5109 %} 5110 5111 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5112 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5113 match(Set dst (AddReductionVI src1 src2)); 5114 match(Set dst (AndReductionV src1 src2)); 5115 match(Set dst ( OrReductionV src1 src2)); 5116 match(Set dst (XorReductionV src1 src2)); 5117 match(Set dst (MinReductionV src1 src2)); 5118 match(Set dst (MaxReductionV src1 src2)); 5119 effect(TEMP vtmp1, TEMP vtmp2); 5120 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5121 ins_encode %{ 5122 int opcode = this->ideal_Opcode(); 5123 int vlen = Matcher::vector_length(this, $src2); 5124 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 // =======================Short Reduction========================================== 5130 5131 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5132 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5133 match(Set dst (AddReductionVI src1 src2)); 5134 match(Set dst (MulReductionVI src1 src2)); 5135 match(Set dst (AndReductionV src1 src2)); 5136 match(Set dst ( OrReductionV src1 src2)); 5137 match(Set dst (XorReductionV src1 src2)); 5138 match(Set dst (MinReductionV src1 src2)); 5139 match(Set dst (MaxReductionV src1 src2)); 5140 effect(TEMP vtmp1, TEMP vtmp2); 5141 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5142 ins_encode %{ 5143 int opcode = this->ideal_Opcode(); 5144 int vlen = Matcher::vector_length(this, $src2); 5145 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 // =======================Mul Reduction========================================== 5151 5152 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5154 Matcher::vector_length(n->in(2)) <= 32); // src2 5155 match(Set dst (MulReductionVI src1 src2)); 5156 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5157 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5158 ins_encode %{ 5159 int opcode = this->ideal_Opcode(); 5160 int vlen = Matcher::vector_length(this, $src2); 5161 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5162 %} 5163 ins_pipe( pipe_slow ); 5164 %} 5165 5166 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5167 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5168 Matcher::vector_length(n->in(2)) == 64); // src2 5169 match(Set dst (MulReductionVI src1 src2)); 5170 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5171 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5172 ins_encode %{ 5173 int opcode = this->ideal_Opcode(); 5174 int vlen = Matcher::vector_length(this, $src2); 5175 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 //--------------------Min/Max Float Reduction -------------------- 5181 // Float Min Reduction 5182 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5183 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5184 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5185 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5186 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5187 Matcher::vector_length(n->in(2)) == 2); 5188 match(Set dst (MinReductionV src1 src2)); 5189 match(Set dst (MaxReductionV src1 src2)); 5190 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5191 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5192 ins_encode %{ 5193 assert(UseAVX > 0, "sanity"); 5194 5195 int opcode = this->ideal_Opcode(); 5196 int vlen = Matcher::vector_length(this, $src2); 5197 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5198 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5199 %} 5200 ins_pipe( pipe_slow ); 5201 %} 5202 5203 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5204 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5205 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5206 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5207 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5208 Matcher::vector_length(n->in(2)) >= 4); 5209 match(Set dst (MinReductionV src1 src2)); 5210 match(Set dst (MaxReductionV src1 src2)); 5211 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5212 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5213 ins_encode %{ 5214 assert(UseAVX > 0, "sanity"); 5215 5216 int opcode = this->ideal_Opcode(); 5217 int vlen = Matcher::vector_length(this, $src2); 5218 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5219 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5220 %} 5221 ins_pipe( pipe_slow ); 5222 %} 5223 5224 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, 5225 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5226 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5227 Matcher::vector_length(n->in(2)) == 2); 5228 match(Set dst (MinReductionV dst src)); 5229 match(Set dst (MaxReductionV dst src)); 5230 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5231 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5232 ins_encode %{ 5233 assert(UseAVX > 0, "sanity"); 5234 5235 int opcode = this->ideal_Opcode(); 5236 int vlen = Matcher::vector_length(this, $src); 5237 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5238 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 5244 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, 5245 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5246 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5247 Matcher::vector_length(n->in(2)) >= 4); 5248 match(Set dst (MinReductionV dst src)); 5249 match(Set dst (MaxReductionV dst src)); 5250 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5251 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5252 ins_encode %{ 5253 assert(UseAVX > 0, "sanity"); 5254 5255 int opcode = this->ideal_Opcode(); 5256 int vlen = Matcher::vector_length(this, $src); 5257 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5258 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5259 %} 5260 ins_pipe( pipe_slow ); 5261 %} 5262 5263 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ 5264 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5265 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5266 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5267 Matcher::vector_length(n->in(2)) == 2); 5268 match(Set dst (MinReductionV src1 src2)); 5269 match(Set dst (MaxReductionV src1 src2)); 5270 effect(TEMP dst, TEMP xtmp1); 5271 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} 5272 ins_encode %{ 5273 int opcode = this->ideal_Opcode(); 5274 int vlen = Matcher::vector_length(this, $src2); 5275 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5276 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5277 %} 5278 ins_pipe( pipe_slow ); 5279 %} 5280 5281 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ 5282 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5283 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5284 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5285 Matcher::vector_length(n->in(2)) >= 4); 5286 match(Set dst (MinReductionV src1 src2)); 5287 match(Set dst (MaxReductionV src1 src2)); 5288 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5289 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} 5290 ins_encode %{ 5291 int opcode = this->ideal_Opcode(); 5292 int vlen = Matcher::vector_length(this, $src2); 5293 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5294 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5295 %} 5296 ins_pipe( pipe_slow ); 5297 %} 5298 5299 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ 5300 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5301 Matcher::vector_length(n->in(2)) == 2); 5302 match(Set dst (MinReductionV dst src)); 5303 match(Set dst (MaxReductionV dst src)); 5304 effect(TEMP dst, TEMP xtmp1); 5305 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} 5306 ins_encode %{ 5307 int opcode = this->ideal_Opcode(); 5308 int vlen = Matcher::vector_length(this, $src); 5309 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5310 $xtmp1$$XMMRegister); 5311 %} 5312 ins_pipe( pipe_slow ); 5313 %} 5314 5315 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ 5316 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5317 Matcher::vector_length(n->in(2)) >= 4); 5318 match(Set dst (MinReductionV dst src)); 5319 match(Set dst (MaxReductionV dst src)); 5320 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5321 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} 5322 ins_encode %{ 5323 int opcode = this->ideal_Opcode(); 5324 int vlen = Matcher::vector_length(this, $src); 5325 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5326 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5327 %} 5328 ins_pipe( pipe_slow ); 5329 %} 5330 5331 //--------------------Min Double Reduction -------------------- 5332 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5333 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5334 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5335 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5336 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5337 Matcher::vector_length(n->in(2)) == 2); 5338 match(Set dst (MinReductionV src1 src2)); 5339 match(Set dst (MaxReductionV src1 src2)); 5340 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5341 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5342 ins_encode %{ 5343 assert(UseAVX > 0, "sanity"); 5344 5345 int opcode = this->ideal_Opcode(); 5346 int vlen = Matcher::vector_length(this, $src2); 5347 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5348 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5349 %} 5350 ins_pipe( pipe_slow ); 5351 %} 5352 5353 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5354 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5355 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5356 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5357 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5358 Matcher::vector_length(n->in(2)) >= 4); 5359 match(Set dst (MinReductionV src1 src2)); 5360 match(Set dst (MaxReductionV src1 src2)); 5361 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5362 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5363 ins_encode %{ 5364 assert(UseAVX > 0, "sanity"); 5365 5366 int opcode = this->ideal_Opcode(); 5367 int vlen = Matcher::vector_length(this, $src2); 5368 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5369 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5370 %} 5371 ins_pipe( pipe_slow ); 5372 %} 5373 5374 5375 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, 5376 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5377 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5378 Matcher::vector_length(n->in(2)) == 2); 5379 match(Set dst (MinReductionV dst src)); 5380 match(Set dst (MaxReductionV dst src)); 5381 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5382 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5383 ins_encode %{ 5384 assert(UseAVX > 0, "sanity"); 5385 5386 int opcode = this->ideal_Opcode(); 5387 int vlen = Matcher::vector_length(this, $src); 5388 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5389 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, 5395 legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5396 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5397 Matcher::vector_length(n->in(2)) >= 4); 5398 match(Set dst (MinReductionV dst src)); 5399 match(Set dst (MaxReductionV dst src)); 5400 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5401 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5402 ins_encode %{ 5403 assert(UseAVX > 0, "sanity"); 5404 5405 int opcode = this->ideal_Opcode(); 5406 int vlen = Matcher::vector_length(this, $src); 5407 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5408 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ 5414 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5415 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5416 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5417 Matcher::vector_length(n->in(2)) == 2); 5418 match(Set dst (MinReductionV src1 src2)); 5419 match(Set dst (MaxReductionV src1 src2)); 5420 effect(TEMP dst, TEMP xtmp1); 5421 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} 5422 ins_encode %{ 5423 int opcode = this->ideal_Opcode(); 5424 int vlen = Matcher::vector_length(this, $src2); 5425 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, 5426 xnoreg, xnoreg, $xtmp1$$XMMRegister); 5427 %} 5428 ins_pipe( pipe_slow ); 5429 %} 5430 5431 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ 5432 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5433 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5434 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5435 Matcher::vector_length(n->in(2)) >= 4); 5436 match(Set dst (MinReductionV src1 src2)); 5437 match(Set dst (MaxReductionV src1 src2)); 5438 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5439 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} 5440 ins_encode %{ 5441 int opcode = this->ideal_Opcode(); 5442 int vlen = Matcher::vector_length(this, $src2); 5443 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5444 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5445 %} 5446 ins_pipe( pipe_slow ); 5447 %} 5448 5449 5450 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ 5451 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5452 Matcher::vector_length(n->in(2)) == 2); 5453 match(Set dst (MinReductionV dst src)); 5454 match(Set dst (MaxReductionV dst src)); 5455 effect(TEMP dst, TEMP xtmp1); 5456 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} 5457 ins_encode %{ 5458 int opcode = this->ideal_Opcode(); 5459 int vlen = Matcher::vector_length(this, $src); 5460 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5461 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ 5467 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5468 Matcher::vector_length(n->in(2)) >= 4); 5469 match(Set dst (MinReductionV dst src)); 5470 match(Set dst (MaxReductionV dst src)); 5471 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5472 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} 5473 ins_encode %{ 5474 int opcode = this->ideal_Opcode(); 5475 int vlen = Matcher::vector_length(this, $src); 5476 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5477 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5478 %} 5479 ins_pipe( pipe_slow ); 5480 %} 5481 5482 // ====================VECTOR ARITHMETIC======================================= 5483 5484 // --------------------------------- ADD -------------------------------------- 5485 5486 // Bytes vector add 5487 instruct vaddB(vec dst, vec src) %{ 5488 predicate(UseAVX == 0); 5489 match(Set dst (AddVB dst src)); 5490 format %{ "paddb $dst,$src\t! add packedB" %} 5491 ins_encode %{ 5492 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5498 predicate(UseAVX > 0); 5499 match(Set dst (AddVB src1 src2)); 5500 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5501 ins_encode %{ 5502 int vlen_enc = vector_length_encoding(this); 5503 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5509 predicate((UseAVX > 0) && 5510 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5511 match(Set dst (AddVB src (LoadVector mem))); 5512 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5513 ins_encode %{ 5514 int vlen_enc = vector_length_encoding(this); 5515 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 // Shorts/Chars vector add 5521 instruct vaddS(vec dst, vec src) %{ 5522 predicate(UseAVX == 0); 5523 match(Set dst (AddVS dst src)); 5524 format %{ "paddw $dst,$src\t! add packedS" %} 5525 ins_encode %{ 5526 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5527 %} 5528 ins_pipe( pipe_slow ); 5529 %} 5530 5531 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5532 predicate(UseAVX > 0); 5533 match(Set dst (AddVS src1 src2)); 5534 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5535 ins_encode %{ 5536 int vlen_enc = vector_length_encoding(this); 5537 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5543 predicate((UseAVX > 0) && 5544 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5545 match(Set dst (AddVS src (LoadVector mem))); 5546 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5547 ins_encode %{ 5548 int vlen_enc = vector_length_encoding(this); 5549 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 // Integers vector add 5555 instruct vaddI(vec dst, vec src) %{ 5556 predicate(UseAVX == 0); 5557 match(Set dst (AddVI dst src)); 5558 format %{ "paddd $dst,$src\t! add packedI" %} 5559 ins_encode %{ 5560 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5566 predicate(UseAVX > 0); 5567 match(Set dst (AddVI src1 src2)); 5568 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5569 ins_encode %{ 5570 int vlen_enc = vector_length_encoding(this); 5571 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5572 %} 5573 ins_pipe( pipe_slow ); 5574 %} 5575 5576 5577 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5578 predicate((UseAVX > 0) && 5579 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5580 match(Set dst (AddVI src (LoadVector mem))); 5581 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5582 ins_encode %{ 5583 int vlen_enc = vector_length_encoding(this); 5584 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5585 %} 5586 ins_pipe( pipe_slow ); 5587 %} 5588 5589 // Longs vector add 5590 instruct vaddL(vec dst, vec src) %{ 5591 predicate(UseAVX == 0); 5592 match(Set dst (AddVL dst src)); 5593 format %{ "paddq $dst,$src\t! add packedL" %} 5594 ins_encode %{ 5595 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5601 predicate(UseAVX > 0); 5602 match(Set dst (AddVL src1 src2)); 5603 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5604 ins_encode %{ 5605 int vlen_enc = vector_length_encoding(this); 5606 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5612 predicate((UseAVX > 0) && 5613 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5614 match(Set dst (AddVL src (LoadVector mem))); 5615 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5616 ins_encode %{ 5617 int vlen_enc = vector_length_encoding(this); 5618 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5619 %} 5620 ins_pipe( pipe_slow ); 5621 %} 5622 5623 // Floats vector add 5624 instruct vaddF(vec dst, vec src) %{ 5625 predicate(UseAVX == 0); 5626 match(Set dst (AddVF dst src)); 5627 format %{ "addps $dst,$src\t! add packedF" %} 5628 ins_encode %{ 5629 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5630 %} 5631 ins_pipe( pipe_slow ); 5632 %} 5633 5634 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5635 predicate(UseAVX > 0); 5636 match(Set dst (AddVF src1 src2)); 5637 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5638 ins_encode %{ 5639 int vlen_enc = vector_length_encoding(this); 5640 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5641 %} 5642 ins_pipe( pipe_slow ); 5643 %} 5644 5645 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5646 predicate((UseAVX > 0) && 5647 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5648 match(Set dst (AddVF src (LoadVector mem))); 5649 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5650 ins_encode %{ 5651 int vlen_enc = vector_length_encoding(this); 5652 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5653 %} 5654 ins_pipe( pipe_slow ); 5655 %} 5656 5657 // Doubles vector add 5658 instruct vaddD(vec dst, vec src) %{ 5659 predicate(UseAVX == 0); 5660 match(Set dst (AddVD dst src)); 5661 format %{ "addpd $dst,$src\t! add packedD" %} 5662 ins_encode %{ 5663 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5664 %} 5665 ins_pipe( pipe_slow ); 5666 %} 5667 5668 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5669 predicate(UseAVX > 0); 5670 match(Set dst (AddVD src1 src2)); 5671 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5672 ins_encode %{ 5673 int vlen_enc = vector_length_encoding(this); 5674 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5675 %} 5676 ins_pipe( pipe_slow ); 5677 %} 5678 5679 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5680 predicate((UseAVX > 0) && 5681 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5682 match(Set dst (AddVD src (LoadVector mem))); 5683 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5684 ins_encode %{ 5685 int vlen_enc = vector_length_encoding(this); 5686 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 // --------------------------------- SUB -------------------------------------- 5692 5693 // Bytes vector sub 5694 instruct vsubB(vec dst, vec src) %{ 5695 predicate(UseAVX == 0); 5696 match(Set dst (SubVB dst src)); 5697 format %{ "psubb $dst,$src\t! sub packedB" %} 5698 ins_encode %{ 5699 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5705 predicate(UseAVX > 0); 5706 match(Set dst (SubVB src1 src2)); 5707 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5708 ins_encode %{ 5709 int vlen_enc = vector_length_encoding(this); 5710 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5716 predicate((UseAVX > 0) && 5717 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5718 match(Set dst (SubVB src (LoadVector mem))); 5719 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5720 ins_encode %{ 5721 int vlen_enc = vector_length_encoding(this); 5722 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 // Shorts/Chars vector sub 5728 instruct vsubS(vec dst, vec src) %{ 5729 predicate(UseAVX == 0); 5730 match(Set dst (SubVS dst src)); 5731 format %{ "psubw $dst,$src\t! sub packedS" %} 5732 ins_encode %{ 5733 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5734 %} 5735 ins_pipe( pipe_slow ); 5736 %} 5737 5738 5739 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5740 predicate(UseAVX > 0); 5741 match(Set dst (SubVS src1 src2)); 5742 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5743 ins_encode %{ 5744 int vlen_enc = vector_length_encoding(this); 5745 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5751 predicate((UseAVX > 0) && 5752 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5753 match(Set dst (SubVS src (LoadVector mem))); 5754 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5755 ins_encode %{ 5756 int vlen_enc = vector_length_encoding(this); 5757 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5758 %} 5759 ins_pipe( pipe_slow ); 5760 %} 5761 5762 // Integers vector sub 5763 instruct vsubI(vec dst, vec src) %{ 5764 predicate(UseAVX == 0); 5765 match(Set dst (SubVI dst src)); 5766 format %{ "psubd $dst,$src\t! sub packedI" %} 5767 ins_encode %{ 5768 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5769 %} 5770 ins_pipe( pipe_slow ); 5771 %} 5772 5773 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5774 predicate(UseAVX > 0); 5775 match(Set dst (SubVI src1 src2)); 5776 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5777 ins_encode %{ 5778 int vlen_enc = vector_length_encoding(this); 5779 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5780 %} 5781 ins_pipe( pipe_slow ); 5782 %} 5783 5784 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5785 predicate((UseAVX > 0) && 5786 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5787 match(Set dst (SubVI src (LoadVector mem))); 5788 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5789 ins_encode %{ 5790 int vlen_enc = vector_length_encoding(this); 5791 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 // Longs vector sub 5797 instruct vsubL(vec dst, vec src) %{ 5798 predicate(UseAVX == 0); 5799 match(Set dst (SubVL dst src)); 5800 format %{ "psubq $dst,$src\t! sub packedL" %} 5801 ins_encode %{ 5802 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5808 predicate(UseAVX > 0); 5809 match(Set dst (SubVL src1 src2)); 5810 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5811 ins_encode %{ 5812 int vlen_enc = vector_length_encoding(this); 5813 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 5819 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5820 predicate((UseAVX > 0) && 5821 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5822 match(Set dst (SubVL src (LoadVector mem))); 5823 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5824 ins_encode %{ 5825 int vlen_enc = vector_length_encoding(this); 5826 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5827 %} 5828 ins_pipe( pipe_slow ); 5829 %} 5830 5831 // Floats vector sub 5832 instruct vsubF(vec dst, vec src) %{ 5833 predicate(UseAVX == 0); 5834 match(Set dst (SubVF dst src)); 5835 format %{ "subps $dst,$src\t! sub packedF" %} 5836 ins_encode %{ 5837 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5838 %} 5839 ins_pipe( pipe_slow ); 5840 %} 5841 5842 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5843 predicate(UseAVX > 0); 5844 match(Set dst (SubVF src1 src2)); 5845 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5846 ins_encode %{ 5847 int vlen_enc = vector_length_encoding(this); 5848 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5849 %} 5850 ins_pipe( pipe_slow ); 5851 %} 5852 5853 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5854 predicate((UseAVX > 0) && 5855 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5856 match(Set dst (SubVF src (LoadVector mem))); 5857 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5858 ins_encode %{ 5859 int vlen_enc = vector_length_encoding(this); 5860 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5861 %} 5862 ins_pipe( pipe_slow ); 5863 %} 5864 5865 // Doubles vector sub 5866 instruct vsubD(vec dst, vec src) %{ 5867 predicate(UseAVX == 0); 5868 match(Set dst (SubVD dst src)); 5869 format %{ "subpd $dst,$src\t! sub packedD" %} 5870 ins_encode %{ 5871 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5877 predicate(UseAVX > 0); 5878 match(Set dst (SubVD src1 src2)); 5879 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5880 ins_encode %{ 5881 int vlen_enc = vector_length_encoding(this); 5882 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5888 predicate((UseAVX > 0) && 5889 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5890 match(Set dst (SubVD src (LoadVector mem))); 5891 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5892 ins_encode %{ 5893 int vlen_enc = vector_length_encoding(this); 5894 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5895 %} 5896 ins_pipe( pipe_slow ); 5897 %} 5898 5899 // --------------------------------- MUL -------------------------------------- 5900 5901 // Byte vector mul 5902 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5903 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5904 match(Set dst (MulVB src1 src2)); 5905 effect(TEMP dst, TEMP xtmp); 5906 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5907 ins_encode %{ 5908 assert(UseSSE > 3, "required"); 5909 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5910 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5911 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5912 __ psllw($dst$$XMMRegister, 8); 5913 __ psrlw($dst$$XMMRegister, 8); 5914 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5915 %} 5916 ins_pipe( pipe_slow ); 5917 %} 5918 5919 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5920 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5921 match(Set dst (MulVB src1 src2)); 5922 effect(TEMP dst, TEMP xtmp); 5923 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5924 ins_encode %{ 5925 assert(UseSSE > 3, "required"); 5926 // Odd-index elements 5927 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5928 __ psrlw($dst$$XMMRegister, 8); 5929 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5930 __ psrlw($xtmp$$XMMRegister, 8); 5931 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5932 __ psllw($dst$$XMMRegister, 8); 5933 // Even-index elements 5934 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5935 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5936 __ psllw($xtmp$$XMMRegister, 8); 5937 __ psrlw($xtmp$$XMMRegister, 8); 5938 // Combine 5939 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5945 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5946 match(Set dst (MulVB src1 src2)); 5947 effect(TEMP xtmp1, TEMP xtmp2); 5948 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5949 ins_encode %{ 5950 int vlen_enc = vector_length_encoding(this); 5951 // Odd-index elements 5952 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5953 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5954 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5955 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5956 // Even-index elements 5957 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5958 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5959 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5960 // Combine 5961 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5962 %} 5963 ins_pipe( pipe_slow ); 5964 %} 5965 5966 // Shorts/Chars vector mul 5967 instruct vmulS(vec dst, vec src) %{ 5968 predicate(UseAVX == 0); 5969 match(Set dst (MulVS dst src)); 5970 format %{ "pmullw $dst,$src\t! mul packedS" %} 5971 ins_encode %{ 5972 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5973 %} 5974 ins_pipe( pipe_slow ); 5975 %} 5976 5977 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5978 predicate(UseAVX > 0); 5979 match(Set dst (MulVS src1 src2)); 5980 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5981 ins_encode %{ 5982 int vlen_enc = vector_length_encoding(this); 5983 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5989 predicate((UseAVX > 0) && 5990 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5991 match(Set dst (MulVS src (LoadVector mem))); 5992 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5993 ins_encode %{ 5994 int vlen_enc = vector_length_encoding(this); 5995 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 // Integers vector mul 6001 instruct vmulI(vec dst, vec src) %{ 6002 predicate(UseAVX == 0); 6003 match(Set dst (MulVI dst src)); 6004 format %{ "pmulld $dst,$src\t! mul packedI" %} 6005 ins_encode %{ 6006 assert(UseSSE > 3, "required"); 6007 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6008 %} 6009 ins_pipe( pipe_slow ); 6010 %} 6011 6012 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6013 predicate(UseAVX > 0); 6014 match(Set dst (MulVI src1 src2)); 6015 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6016 ins_encode %{ 6017 int vlen_enc = vector_length_encoding(this); 6018 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6024 predicate((UseAVX > 0) && 6025 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6026 match(Set dst (MulVI src (LoadVector mem))); 6027 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6028 ins_encode %{ 6029 int vlen_enc = vector_length_encoding(this); 6030 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 // Longs vector mul 6036 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6037 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6038 VM_Version::supports_avx512dq()) || 6039 VM_Version::supports_avx512vldq()); 6040 match(Set dst (MulVL src1 src2)); 6041 ins_cost(500); 6042 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6043 ins_encode %{ 6044 assert(UseAVX > 2, "required"); 6045 int vlen_enc = vector_length_encoding(this); 6046 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6047 %} 6048 ins_pipe( pipe_slow ); 6049 %} 6050 6051 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6052 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6053 VM_Version::supports_avx512dq()) || 6054 (Matcher::vector_length_in_bytes(n) > 8 && 6055 VM_Version::supports_avx512vldq())); 6056 match(Set dst (MulVL src (LoadVector mem))); 6057 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6058 ins_cost(500); 6059 ins_encode %{ 6060 assert(UseAVX > 2, "required"); 6061 int vlen_enc = vector_length_encoding(this); 6062 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6068 predicate(UseAVX == 0); 6069 match(Set dst (MulVL src1 src2)); 6070 ins_cost(500); 6071 effect(TEMP dst, TEMP xtmp); 6072 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6073 ins_encode %{ 6074 assert(VM_Version::supports_sse4_1(), "required"); 6075 // Get the lo-hi products, only the lower 32 bits is in concerns 6076 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6077 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6078 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6079 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6080 __ psllq($dst$$XMMRegister, 32); 6081 // Get the lo-lo products 6082 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6083 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6084 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6085 %} 6086 ins_pipe( pipe_slow ); 6087 %} 6088 6089 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6090 predicate(UseAVX > 0 && 6091 ((Matcher::vector_length_in_bytes(n) == 64 && 6092 !VM_Version::supports_avx512dq()) || 6093 (Matcher::vector_length_in_bytes(n) < 64 && 6094 !VM_Version::supports_avx512vldq()))); 6095 match(Set dst (MulVL src1 src2)); 6096 effect(TEMP xtmp1, TEMP xtmp2); 6097 ins_cost(500); 6098 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6099 ins_encode %{ 6100 int vlen_enc = vector_length_encoding(this); 6101 // Get the lo-hi products, only the lower 32 bits is in concerns 6102 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6103 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6104 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6105 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6106 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6107 // Get the lo-lo products 6108 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6109 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6115 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6116 match(Set dst (MulVL src1 src2)); 6117 ins_cost(100); 6118 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6119 ins_encode %{ 6120 int vlen_enc = vector_length_encoding(this); 6121 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6127 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6128 match(Set dst (MulVL src1 src2)); 6129 ins_cost(100); 6130 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6131 ins_encode %{ 6132 int vlen_enc = vector_length_encoding(this); 6133 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 6138 // Floats vector mul 6139 instruct vmulF(vec dst, vec src) %{ 6140 predicate(UseAVX == 0); 6141 match(Set dst (MulVF dst src)); 6142 format %{ "mulps $dst,$src\t! mul packedF" %} 6143 ins_encode %{ 6144 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6150 predicate(UseAVX > 0); 6151 match(Set dst (MulVF src1 src2)); 6152 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6153 ins_encode %{ 6154 int vlen_enc = vector_length_encoding(this); 6155 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6156 %} 6157 ins_pipe( pipe_slow ); 6158 %} 6159 6160 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6161 predicate((UseAVX > 0) && 6162 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6163 match(Set dst (MulVF src (LoadVector mem))); 6164 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6165 ins_encode %{ 6166 int vlen_enc = vector_length_encoding(this); 6167 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 // Doubles vector mul 6173 instruct vmulD(vec dst, vec src) %{ 6174 predicate(UseAVX == 0); 6175 match(Set dst (MulVD dst src)); 6176 format %{ "mulpd $dst,$src\t! mul packedD" %} 6177 ins_encode %{ 6178 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6184 predicate(UseAVX > 0); 6185 match(Set dst (MulVD src1 src2)); 6186 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6187 ins_encode %{ 6188 int vlen_enc = vector_length_encoding(this); 6189 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6195 predicate((UseAVX > 0) && 6196 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6197 match(Set dst (MulVD src (LoadVector mem))); 6198 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6199 ins_encode %{ 6200 int vlen_enc = vector_length_encoding(this); 6201 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 // --------------------------------- DIV -------------------------------------- 6207 6208 // Floats vector div 6209 instruct vdivF(vec dst, vec src) %{ 6210 predicate(UseAVX == 0); 6211 match(Set dst (DivVF dst src)); 6212 format %{ "divps $dst,$src\t! div packedF" %} 6213 ins_encode %{ 6214 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6220 predicate(UseAVX > 0); 6221 match(Set dst (DivVF src1 src2)); 6222 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6223 ins_encode %{ 6224 int vlen_enc = vector_length_encoding(this); 6225 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6231 predicate((UseAVX > 0) && 6232 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6233 match(Set dst (DivVF src (LoadVector mem))); 6234 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6235 ins_encode %{ 6236 int vlen_enc = vector_length_encoding(this); 6237 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 // Doubles vector div 6243 instruct vdivD(vec dst, vec src) %{ 6244 predicate(UseAVX == 0); 6245 match(Set dst (DivVD dst src)); 6246 format %{ "divpd $dst,$src\t! div packedD" %} 6247 ins_encode %{ 6248 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6249 %} 6250 ins_pipe( pipe_slow ); 6251 %} 6252 6253 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6254 predicate(UseAVX > 0); 6255 match(Set dst (DivVD src1 src2)); 6256 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6257 ins_encode %{ 6258 int vlen_enc = vector_length_encoding(this); 6259 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6265 predicate((UseAVX > 0) && 6266 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6267 match(Set dst (DivVD src (LoadVector mem))); 6268 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6269 ins_encode %{ 6270 int vlen_enc = vector_length_encoding(this); 6271 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 // ------------------------------ MinMax --------------------------------------- 6277 6278 // Byte, Short, Int vector Min/Max 6279 instruct minmax_reg_sse(vec dst, vec src) %{ 6280 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6281 UseAVX == 0); 6282 match(Set dst (MinV dst src)); 6283 match(Set dst (MaxV dst src)); 6284 format %{ "vector_minmax $dst,$src\t! " %} 6285 ins_encode %{ 6286 assert(UseSSE >= 4, "required"); 6287 6288 int opcode = this->ideal_Opcode(); 6289 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6290 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6296 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6297 UseAVX > 0); 6298 match(Set dst (MinV src1 src2)); 6299 match(Set dst (MaxV src1 src2)); 6300 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6301 ins_encode %{ 6302 int opcode = this->ideal_Opcode(); 6303 int vlen_enc = vector_length_encoding(this); 6304 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6305 6306 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6307 %} 6308 ins_pipe( pipe_slow ); 6309 %} 6310 6311 // Long vector Min/Max 6312 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6313 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6314 UseAVX == 0); 6315 match(Set dst (MinV dst src)); 6316 match(Set dst (MaxV src dst)); 6317 effect(TEMP dst, TEMP tmp); 6318 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6319 ins_encode %{ 6320 assert(UseSSE >= 4, "required"); 6321 6322 int opcode = this->ideal_Opcode(); 6323 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6324 assert(elem_bt == T_LONG, "sanity"); 6325 6326 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6332 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6333 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6334 match(Set dst (MinV src1 src2)); 6335 match(Set dst (MaxV src1 src2)); 6336 effect(TEMP dst); 6337 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6338 ins_encode %{ 6339 int vlen_enc = vector_length_encoding(this); 6340 int opcode = this->ideal_Opcode(); 6341 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6342 assert(elem_bt == T_LONG, "sanity"); 6343 6344 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6350 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6351 Matcher::vector_element_basic_type(n) == T_LONG); 6352 match(Set dst (MinV src1 src2)); 6353 match(Set dst (MaxV src1 src2)); 6354 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6355 ins_encode %{ 6356 assert(UseAVX > 2, "required"); 6357 6358 int vlen_enc = vector_length_encoding(this); 6359 int opcode = this->ideal_Opcode(); 6360 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6361 assert(elem_bt == T_LONG, "sanity"); 6362 6363 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 // Float/Double vector Min/Max 6369 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ 6370 predicate(VM_Version::supports_avx10_2() && 6371 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6372 match(Set dst (MinV a b)); 6373 match(Set dst (MaxV a b)); 6374 format %{ "vector_minmaxFP $dst, $a, $b" %} 6375 ins_encode %{ 6376 int vlen_enc = vector_length_encoding(this); 6377 int opcode = this->ideal_Opcode(); 6378 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6379 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 // Float/Double vector Min/Max 6385 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6386 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && 6387 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6388 UseAVX > 0); 6389 match(Set dst (MinV a b)); 6390 match(Set dst (MaxV a b)); 6391 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6392 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6393 ins_encode %{ 6394 assert(UseAVX > 0, "required"); 6395 6396 int opcode = this->ideal_Opcode(); 6397 int vlen_enc = vector_length_encoding(this); 6398 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6399 6400 __ vminmax_fp(opcode, elem_bt, 6401 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6402 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6403 %} 6404 ins_pipe( pipe_slow ); 6405 %} 6406 6407 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6408 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && 6409 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6410 match(Set dst (MinV a b)); 6411 match(Set dst (MaxV a b)); 6412 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6413 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6414 ins_encode %{ 6415 assert(UseAVX > 2, "required"); 6416 6417 int opcode = this->ideal_Opcode(); 6418 int vlen_enc = vector_length_encoding(this); 6419 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6420 6421 __ evminmax_fp(opcode, elem_bt, 6422 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6423 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6424 %} 6425 ins_pipe( pipe_slow ); 6426 %} 6427 6428 // ------------------------------ Unsigned vector Min/Max ---------------------- 6429 6430 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6431 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6432 match(Set dst (UMinV a b)); 6433 match(Set dst (UMaxV a b)); 6434 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6435 ins_encode %{ 6436 int opcode = this->ideal_Opcode(); 6437 int vlen_enc = vector_length_encoding(this); 6438 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6439 assert(is_integral_type(elem_bt), ""); 6440 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6441 %} 6442 ins_pipe( pipe_slow ); 6443 %} 6444 6445 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6446 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6447 match(Set dst (UMinV a (LoadVector b))); 6448 match(Set dst (UMaxV a (LoadVector b))); 6449 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6450 ins_encode %{ 6451 int opcode = this->ideal_Opcode(); 6452 int vlen_enc = vector_length_encoding(this); 6453 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6454 assert(is_integral_type(elem_bt), ""); 6455 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6461 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6462 match(Set dst (UMinV a b)); 6463 match(Set dst (UMaxV a b)); 6464 effect(TEMP xtmp1, TEMP xtmp2); 6465 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6466 ins_encode %{ 6467 int opcode = this->ideal_Opcode(); 6468 int vlen_enc = vector_length_encoding(this); 6469 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6470 %} 6471 ins_pipe( pipe_slow ); 6472 %} 6473 6474 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6475 match(Set dst (UMinV (Binary dst src2) mask)); 6476 match(Set dst (UMaxV (Binary dst src2) mask)); 6477 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6478 ins_encode %{ 6479 int vlen_enc = vector_length_encoding(this); 6480 BasicType bt = Matcher::vector_element_basic_type(this); 6481 int opc = this->ideal_Opcode(); 6482 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6483 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6484 %} 6485 ins_pipe( pipe_slow ); 6486 %} 6487 6488 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6489 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6490 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6491 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6492 ins_encode %{ 6493 int vlen_enc = vector_length_encoding(this); 6494 BasicType bt = Matcher::vector_element_basic_type(this); 6495 int opc = this->ideal_Opcode(); 6496 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6497 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 // --------------------------------- Signum/CopySign --------------------------- 6503 6504 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6505 match(Set dst (SignumF dst (Binary zero one))); 6506 effect(KILL cr); 6507 format %{ "signumF $dst, $dst" %} 6508 ins_encode %{ 6509 int opcode = this->ideal_Opcode(); 6510 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6516 match(Set dst (SignumD dst (Binary zero one))); 6517 effect(KILL cr); 6518 format %{ "signumD $dst, $dst" %} 6519 ins_encode %{ 6520 int opcode = this->ideal_Opcode(); 6521 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6527 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6528 match(Set dst (SignumVF src (Binary zero one))); 6529 match(Set dst (SignumVD src (Binary zero one))); 6530 effect(TEMP dst, TEMP xtmp1); 6531 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6532 ins_encode %{ 6533 int opcode = this->ideal_Opcode(); 6534 int vec_enc = vector_length_encoding(this); 6535 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6536 $xtmp1$$XMMRegister, vec_enc); 6537 %} 6538 ins_pipe( pipe_slow ); 6539 %} 6540 6541 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6542 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6543 match(Set dst (SignumVF src (Binary zero one))); 6544 match(Set dst (SignumVD src (Binary zero one))); 6545 effect(TEMP dst, TEMP ktmp1); 6546 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6547 ins_encode %{ 6548 int opcode = this->ideal_Opcode(); 6549 int vec_enc = vector_length_encoding(this); 6550 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6551 $ktmp1$$KRegister, vec_enc); 6552 %} 6553 ins_pipe( pipe_slow ); 6554 %} 6555 6556 // --------------------------------------- 6557 // For copySign use 0xE4 as writemask for vpternlog 6558 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6559 // C (xmm2) is set to 0x7FFFFFFF 6560 // Wherever xmm2 is 0, we want to pick from B (sign) 6561 // Wherever xmm2 is 1, we want to pick from A (src) 6562 // 6563 // A B C Result 6564 // 0 0 0 0 6565 // 0 0 1 0 6566 // 0 1 0 1 6567 // 0 1 1 0 6568 // 1 0 0 0 6569 // 1 0 1 1 6570 // 1 1 0 1 6571 // 1 1 1 1 6572 // 6573 // Result going from high bit to low bit is 0x11100100 = 0xe4 6574 // --------------------------------------- 6575 6576 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6577 match(Set dst (CopySignF dst src)); 6578 effect(TEMP tmp1, TEMP tmp2); 6579 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6580 ins_encode %{ 6581 __ movl($tmp2$$Register, 0x7FFFFFFF); 6582 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6583 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6584 %} 6585 ins_pipe( pipe_slow ); 6586 %} 6587 6588 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6589 match(Set dst (CopySignD dst (Binary src zero))); 6590 ins_cost(100); 6591 effect(TEMP tmp1, TEMP tmp2); 6592 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6593 ins_encode %{ 6594 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6595 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6596 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 //----------------------------- CompressBits/ExpandBits ------------------------ 6602 6603 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6604 predicate(n->bottom_type()->isa_int()); 6605 match(Set dst (CompressBits src mask)); 6606 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6607 ins_encode %{ 6608 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6609 %} 6610 ins_pipe( pipe_slow ); 6611 %} 6612 6613 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6614 predicate(n->bottom_type()->isa_int()); 6615 match(Set dst (ExpandBits src mask)); 6616 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6617 ins_encode %{ 6618 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6624 predicate(n->bottom_type()->isa_int()); 6625 match(Set dst (CompressBits src (LoadI mask))); 6626 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6627 ins_encode %{ 6628 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6629 %} 6630 ins_pipe( pipe_slow ); 6631 %} 6632 6633 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6634 predicate(n->bottom_type()->isa_int()); 6635 match(Set dst (ExpandBits src (LoadI mask))); 6636 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6637 ins_encode %{ 6638 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6639 %} 6640 ins_pipe( pipe_slow ); 6641 %} 6642 6643 // --------------------------------- Sqrt -------------------------------------- 6644 6645 instruct vsqrtF_reg(vec dst, vec src) %{ 6646 match(Set dst (SqrtVF src)); 6647 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6648 ins_encode %{ 6649 assert(UseAVX > 0, "required"); 6650 int vlen_enc = vector_length_encoding(this); 6651 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vsqrtF_mem(vec dst, memory mem) %{ 6657 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6658 match(Set dst (SqrtVF (LoadVector mem))); 6659 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6660 ins_encode %{ 6661 assert(UseAVX > 0, "required"); 6662 int vlen_enc = vector_length_encoding(this); 6663 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6664 %} 6665 ins_pipe( pipe_slow ); 6666 %} 6667 6668 // Floating point vector sqrt 6669 instruct vsqrtD_reg(vec dst, vec src) %{ 6670 match(Set dst (SqrtVD src)); 6671 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6672 ins_encode %{ 6673 assert(UseAVX > 0, "required"); 6674 int vlen_enc = vector_length_encoding(this); 6675 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6676 %} 6677 ins_pipe( pipe_slow ); 6678 %} 6679 6680 instruct vsqrtD_mem(vec dst, memory mem) %{ 6681 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6682 match(Set dst (SqrtVD (LoadVector mem))); 6683 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6684 ins_encode %{ 6685 assert(UseAVX > 0, "required"); 6686 int vlen_enc = vector_length_encoding(this); 6687 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6688 %} 6689 ins_pipe( pipe_slow ); 6690 %} 6691 6692 // ------------------------------ Shift --------------------------------------- 6693 6694 // Left and right shift count vectors are the same on x86 6695 // (only lowest bits of xmm reg are used for count). 6696 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6697 match(Set dst (LShiftCntV cnt)); 6698 match(Set dst (RShiftCntV cnt)); 6699 format %{ "movdl $dst,$cnt\t! load shift count" %} 6700 ins_encode %{ 6701 __ movdl($dst$$XMMRegister, $cnt$$Register); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 // Byte vector shift 6707 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6708 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6709 match(Set dst ( LShiftVB src shift)); 6710 match(Set dst ( RShiftVB src shift)); 6711 match(Set dst (URShiftVB src shift)); 6712 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6713 format %{"vector_byte_shift $dst,$src,$shift" %} 6714 ins_encode %{ 6715 assert(UseSSE > 3, "required"); 6716 int opcode = this->ideal_Opcode(); 6717 bool sign = (opcode != Op_URShiftVB); 6718 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6719 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6720 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6721 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6722 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6728 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6729 UseAVX <= 1); 6730 match(Set dst ( LShiftVB src shift)); 6731 match(Set dst ( RShiftVB src shift)); 6732 match(Set dst (URShiftVB src shift)); 6733 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6734 format %{"vector_byte_shift $dst,$src,$shift" %} 6735 ins_encode %{ 6736 assert(UseSSE > 3, "required"); 6737 int opcode = this->ideal_Opcode(); 6738 bool sign = (opcode != Op_URShiftVB); 6739 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6740 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6741 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6742 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6743 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6745 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6746 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6747 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6748 %} 6749 ins_pipe( pipe_slow ); 6750 %} 6751 6752 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6753 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6754 UseAVX > 1); 6755 match(Set dst ( LShiftVB src shift)); 6756 match(Set dst ( RShiftVB src shift)); 6757 match(Set dst (URShiftVB src shift)); 6758 effect(TEMP dst, TEMP tmp); 6759 format %{"vector_byte_shift $dst,$src,$shift" %} 6760 ins_encode %{ 6761 int opcode = this->ideal_Opcode(); 6762 bool sign = (opcode != Op_URShiftVB); 6763 int vlen_enc = Assembler::AVX_256bit; 6764 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6765 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6766 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6767 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6768 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6769 %} 6770 ins_pipe( pipe_slow ); 6771 %} 6772 6773 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6774 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6775 match(Set dst ( LShiftVB src shift)); 6776 match(Set dst ( RShiftVB src shift)); 6777 match(Set dst (URShiftVB src shift)); 6778 effect(TEMP dst, TEMP tmp); 6779 format %{"vector_byte_shift $dst,$src,$shift" %} 6780 ins_encode %{ 6781 assert(UseAVX > 1, "required"); 6782 int opcode = this->ideal_Opcode(); 6783 bool sign = (opcode != Op_URShiftVB); 6784 int vlen_enc = Assembler::AVX_256bit; 6785 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6786 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6787 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6788 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6789 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6790 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6791 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6792 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6793 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6799 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6800 match(Set dst ( LShiftVB src shift)); 6801 match(Set dst (RShiftVB src shift)); 6802 match(Set dst (URShiftVB src shift)); 6803 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6804 format %{"vector_byte_shift $dst,$src,$shift" %} 6805 ins_encode %{ 6806 assert(UseAVX > 2, "required"); 6807 int opcode = this->ideal_Opcode(); 6808 bool sign = (opcode != Op_URShiftVB); 6809 int vlen_enc = Assembler::AVX_512bit; 6810 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6811 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6812 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6813 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6814 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6815 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6816 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6817 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6818 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6819 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6820 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6821 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 // Shorts vector logical right shift produces incorrect Java result 6827 // for negative data because java code convert short value into int with 6828 // sign extension before a shift. But char vectors are fine since chars are 6829 // unsigned values. 6830 // Shorts/Chars vector left shift 6831 instruct vshiftS(vec dst, vec src, vec shift) %{ 6832 predicate(!n->as_ShiftV()->is_var_shift()); 6833 match(Set dst ( LShiftVS src shift)); 6834 match(Set dst ( RShiftVS src shift)); 6835 match(Set dst (URShiftVS src shift)); 6836 effect(TEMP dst, USE src, USE shift); 6837 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6838 ins_encode %{ 6839 int opcode = this->ideal_Opcode(); 6840 if (UseAVX > 0) { 6841 int vlen_enc = vector_length_encoding(this); 6842 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6843 } else { 6844 int vlen = Matcher::vector_length(this); 6845 if (vlen == 2) { 6846 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6847 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6848 } else if (vlen == 4) { 6849 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6850 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6851 } else { 6852 assert (vlen == 8, "sanity"); 6853 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6854 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6855 } 6856 } 6857 %} 6858 ins_pipe( pipe_slow ); 6859 %} 6860 6861 // Integers vector left shift 6862 instruct vshiftI(vec dst, vec src, vec shift) %{ 6863 predicate(!n->as_ShiftV()->is_var_shift()); 6864 match(Set dst ( LShiftVI src shift)); 6865 match(Set dst ( RShiftVI src shift)); 6866 match(Set dst (URShiftVI src shift)); 6867 effect(TEMP dst, USE src, USE shift); 6868 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6869 ins_encode %{ 6870 int opcode = this->ideal_Opcode(); 6871 if (UseAVX > 0) { 6872 int vlen_enc = vector_length_encoding(this); 6873 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6874 } else { 6875 int vlen = Matcher::vector_length(this); 6876 if (vlen == 2) { 6877 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6878 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6879 } else { 6880 assert(vlen == 4, "sanity"); 6881 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6882 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6883 } 6884 } 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 // Integers vector left constant shift 6890 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6891 match(Set dst (LShiftVI src (LShiftCntV shift))); 6892 match(Set dst (RShiftVI src (RShiftCntV shift))); 6893 match(Set dst (URShiftVI src (RShiftCntV shift))); 6894 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6895 ins_encode %{ 6896 int opcode = this->ideal_Opcode(); 6897 if (UseAVX > 0) { 6898 int vector_len = vector_length_encoding(this); 6899 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6900 } else { 6901 int vlen = Matcher::vector_length(this); 6902 if (vlen == 2) { 6903 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6904 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6905 } else { 6906 assert(vlen == 4, "sanity"); 6907 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6908 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6909 } 6910 } 6911 %} 6912 ins_pipe( pipe_slow ); 6913 %} 6914 6915 // Longs vector shift 6916 instruct vshiftL(vec dst, vec src, vec shift) %{ 6917 predicate(!n->as_ShiftV()->is_var_shift()); 6918 match(Set dst ( LShiftVL src shift)); 6919 match(Set dst (URShiftVL src shift)); 6920 effect(TEMP dst, USE src, USE shift); 6921 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6922 ins_encode %{ 6923 int opcode = this->ideal_Opcode(); 6924 if (UseAVX > 0) { 6925 int vlen_enc = vector_length_encoding(this); 6926 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6927 } else { 6928 assert(Matcher::vector_length(this) == 2, ""); 6929 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6930 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6931 } 6932 %} 6933 ins_pipe( pipe_slow ); 6934 %} 6935 6936 // Longs vector constant shift 6937 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6938 match(Set dst (LShiftVL src (LShiftCntV shift))); 6939 match(Set dst (URShiftVL src (RShiftCntV shift))); 6940 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6941 ins_encode %{ 6942 int opcode = this->ideal_Opcode(); 6943 if (UseAVX > 0) { 6944 int vector_len = vector_length_encoding(this); 6945 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6946 } else { 6947 assert(Matcher::vector_length(this) == 2, ""); 6948 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6949 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6950 } 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 // -------------------ArithmeticRightShift ----------------------------------- 6956 // Long vector arithmetic right shift 6957 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6958 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6959 match(Set dst (RShiftVL src shift)); 6960 effect(TEMP dst, TEMP tmp); 6961 format %{ "vshiftq $dst,$src,$shift" %} 6962 ins_encode %{ 6963 uint vlen = Matcher::vector_length(this); 6964 if (vlen == 2) { 6965 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6966 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6967 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6968 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6969 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6970 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6971 } else { 6972 assert(vlen == 4, "sanity"); 6973 assert(UseAVX > 1, "required"); 6974 int vlen_enc = Assembler::AVX_256bit; 6975 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6976 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6977 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6978 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6979 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6980 } 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6986 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6987 match(Set dst (RShiftVL src shift)); 6988 format %{ "vshiftq $dst,$src,$shift" %} 6989 ins_encode %{ 6990 int vlen_enc = vector_length_encoding(this); 6991 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 // ------------------- Variable Shift ----------------------------- 6997 // Byte variable shift 6998 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6999 predicate(Matcher::vector_length(n) <= 8 && 7000 n->as_ShiftV()->is_var_shift() && 7001 !VM_Version::supports_avx512bw()); 7002 match(Set dst ( LShiftVB src shift)); 7003 match(Set dst ( RShiftVB src shift)); 7004 match(Set dst (URShiftVB src shift)); 7005 effect(TEMP dst, TEMP vtmp); 7006 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7007 ins_encode %{ 7008 assert(UseAVX >= 2, "required"); 7009 7010 int opcode = this->ideal_Opcode(); 7011 int vlen_enc = Assembler::AVX_128bit; 7012 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7013 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7019 predicate(Matcher::vector_length(n) == 16 && 7020 n->as_ShiftV()->is_var_shift() && 7021 !VM_Version::supports_avx512bw()); 7022 match(Set dst ( LShiftVB src shift)); 7023 match(Set dst ( RShiftVB src shift)); 7024 match(Set dst (URShiftVB src shift)); 7025 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7026 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7027 ins_encode %{ 7028 assert(UseAVX >= 2, "required"); 7029 7030 int opcode = this->ideal_Opcode(); 7031 int vlen_enc = Assembler::AVX_128bit; 7032 // Shift lower half and get word result in dst 7033 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7034 7035 // Shift upper half and get word result in vtmp1 7036 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7037 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7038 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7039 7040 // Merge and down convert the two word results to byte in dst 7041 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7042 %} 7043 ins_pipe( pipe_slow ); 7044 %} 7045 7046 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7047 predicate(Matcher::vector_length(n) == 32 && 7048 n->as_ShiftV()->is_var_shift() && 7049 !VM_Version::supports_avx512bw()); 7050 match(Set dst ( LShiftVB src shift)); 7051 match(Set dst ( RShiftVB src shift)); 7052 match(Set dst (URShiftVB src shift)); 7053 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7054 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7055 ins_encode %{ 7056 assert(UseAVX >= 2, "required"); 7057 7058 int opcode = this->ideal_Opcode(); 7059 int vlen_enc = Assembler::AVX_128bit; 7060 // Process lower 128 bits and get result in dst 7061 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7062 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7063 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7064 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7065 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7066 7067 // Process higher 128 bits and get result in vtmp3 7068 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7069 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7070 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7071 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7072 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7073 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7074 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7075 7076 // Merge the two results in dst 7077 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7078 %} 7079 ins_pipe( pipe_slow ); 7080 %} 7081 7082 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7083 predicate(Matcher::vector_length(n) <= 32 && 7084 n->as_ShiftV()->is_var_shift() && 7085 VM_Version::supports_avx512bw()); 7086 match(Set dst ( LShiftVB src shift)); 7087 match(Set dst ( RShiftVB src shift)); 7088 match(Set dst (URShiftVB src shift)); 7089 effect(TEMP dst, TEMP vtmp); 7090 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7091 ins_encode %{ 7092 assert(UseAVX > 2, "required"); 7093 7094 int opcode = this->ideal_Opcode(); 7095 int vlen_enc = vector_length_encoding(this); 7096 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7097 %} 7098 ins_pipe( pipe_slow ); 7099 %} 7100 7101 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7102 predicate(Matcher::vector_length(n) == 64 && 7103 n->as_ShiftV()->is_var_shift() && 7104 VM_Version::supports_avx512bw()); 7105 match(Set dst ( LShiftVB src shift)); 7106 match(Set dst ( RShiftVB src shift)); 7107 match(Set dst (URShiftVB src shift)); 7108 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7109 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7110 ins_encode %{ 7111 assert(UseAVX > 2, "required"); 7112 7113 int opcode = this->ideal_Opcode(); 7114 int vlen_enc = Assembler::AVX_256bit; 7115 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7116 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7117 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7118 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7119 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7120 %} 7121 ins_pipe( pipe_slow ); 7122 %} 7123 7124 // Short variable shift 7125 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7126 predicate(Matcher::vector_length(n) <= 8 && 7127 n->as_ShiftV()->is_var_shift() && 7128 !VM_Version::supports_avx512bw()); 7129 match(Set dst ( LShiftVS src shift)); 7130 match(Set dst ( RShiftVS src shift)); 7131 match(Set dst (URShiftVS src shift)); 7132 effect(TEMP dst, TEMP vtmp); 7133 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7134 ins_encode %{ 7135 assert(UseAVX >= 2, "required"); 7136 7137 int opcode = this->ideal_Opcode(); 7138 bool sign = (opcode != Op_URShiftVS); 7139 int vlen_enc = Assembler::AVX_256bit; 7140 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7141 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7142 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7143 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7144 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7145 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7146 %} 7147 ins_pipe( pipe_slow ); 7148 %} 7149 7150 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7151 predicate(Matcher::vector_length(n) == 16 && 7152 n->as_ShiftV()->is_var_shift() && 7153 !VM_Version::supports_avx512bw()); 7154 match(Set dst ( LShiftVS src shift)); 7155 match(Set dst ( RShiftVS src shift)); 7156 match(Set dst (URShiftVS src shift)); 7157 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7158 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7159 ins_encode %{ 7160 assert(UseAVX >= 2, "required"); 7161 7162 int opcode = this->ideal_Opcode(); 7163 bool sign = (opcode != Op_URShiftVS); 7164 int vlen_enc = Assembler::AVX_256bit; 7165 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7166 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7167 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7168 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7169 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7170 7171 // Shift upper half, with result in dst using vtmp1 as TEMP 7172 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7173 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7174 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7175 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7176 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7177 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7178 7179 // Merge lower and upper half result into dst 7180 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7181 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7182 %} 7183 ins_pipe( pipe_slow ); 7184 %} 7185 7186 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7187 predicate(n->as_ShiftV()->is_var_shift() && 7188 VM_Version::supports_avx512bw()); 7189 match(Set dst ( LShiftVS src shift)); 7190 match(Set dst ( RShiftVS src shift)); 7191 match(Set dst (URShiftVS src shift)); 7192 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7193 ins_encode %{ 7194 assert(UseAVX > 2, "required"); 7195 7196 int opcode = this->ideal_Opcode(); 7197 int vlen_enc = vector_length_encoding(this); 7198 if (!VM_Version::supports_avx512vl()) { 7199 vlen_enc = Assembler::AVX_512bit; 7200 } 7201 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7202 %} 7203 ins_pipe( pipe_slow ); 7204 %} 7205 7206 //Integer variable shift 7207 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7208 predicate(n->as_ShiftV()->is_var_shift()); 7209 match(Set dst ( LShiftVI src shift)); 7210 match(Set dst ( RShiftVI src shift)); 7211 match(Set dst (URShiftVI src shift)); 7212 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7213 ins_encode %{ 7214 assert(UseAVX >= 2, "required"); 7215 7216 int opcode = this->ideal_Opcode(); 7217 int vlen_enc = vector_length_encoding(this); 7218 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7219 %} 7220 ins_pipe( pipe_slow ); 7221 %} 7222 7223 //Long variable shift 7224 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7225 predicate(n->as_ShiftV()->is_var_shift()); 7226 match(Set dst ( LShiftVL src shift)); 7227 match(Set dst (URShiftVL src shift)); 7228 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7229 ins_encode %{ 7230 assert(UseAVX >= 2, "required"); 7231 7232 int opcode = this->ideal_Opcode(); 7233 int vlen_enc = vector_length_encoding(this); 7234 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7235 %} 7236 ins_pipe( pipe_slow ); 7237 %} 7238 7239 //Long variable right shift arithmetic 7240 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7241 predicate(Matcher::vector_length(n) <= 4 && 7242 n->as_ShiftV()->is_var_shift() && 7243 UseAVX == 2); 7244 match(Set dst (RShiftVL src shift)); 7245 effect(TEMP dst, TEMP vtmp); 7246 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7247 ins_encode %{ 7248 int opcode = this->ideal_Opcode(); 7249 int vlen_enc = vector_length_encoding(this); 7250 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7251 $vtmp$$XMMRegister); 7252 %} 7253 ins_pipe( pipe_slow ); 7254 %} 7255 7256 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7257 predicate(n->as_ShiftV()->is_var_shift() && 7258 UseAVX > 2); 7259 match(Set dst (RShiftVL src shift)); 7260 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7261 ins_encode %{ 7262 int opcode = this->ideal_Opcode(); 7263 int vlen_enc = vector_length_encoding(this); 7264 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7265 %} 7266 ins_pipe( pipe_slow ); 7267 %} 7268 7269 // --------------------------------- AND -------------------------------------- 7270 7271 instruct vand(vec dst, vec src) %{ 7272 predicate(UseAVX == 0); 7273 match(Set dst (AndV dst src)); 7274 format %{ "pand $dst,$src\t! and vectors" %} 7275 ins_encode %{ 7276 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7282 predicate(UseAVX > 0); 7283 match(Set dst (AndV src1 src2)); 7284 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7285 ins_encode %{ 7286 int vlen_enc = vector_length_encoding(this); 7287 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vand_mem(vec dst, vec src, memory mem) %{ 7293 predicate((UseAVX > 0) && 7294 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7295 match(Set dst (AndV src (LoadVector mem))); 7296 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7297 ins_encode %{ 7298 int vlen_enc = vector_length_encoding(this); 7299 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7300 %} 7301 ins_pipe( pipe_slow ); 7302 %} 7303 7304 // --------------------------------- OR --------------------------------------- 7305 7306 instruct vor(vec dst, vec src) %{ 7307 predicate(UseAVX == 0); 7308 match(Set dst (OrV dst src)); 7309 format %{ "por $dst,$src\t! or vectors" %} 7310 ins_encode %{ 7311 __ por($dst$$XMMRegister, $src$$XMMRegister); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7317 predicate(UseAVX > 0); 7318 match(Set dst (OrV src1 src2)); 7319 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7320 ins_encode %{ 7321 int vlen_enc = vector_length_encoding(this); 7322 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7323 %} 7324 ins_pipe( pipe_slow ); 7325 %} 7326 7327 instruct vor_mem(vec dst, vec src, memory mem) %{ 7328 predicate((UseAVX > 0) && 7329 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7330 match(Set dst (OrV src (LoadVector mem))); 7331 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7332 ins_encode %{ 7333 int vlen_enc = vector_length_encoding(this); 7334 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 // --------------------------------- XOR -------------------------------------- 7340 7341 instruct vxor(vec dst, vec src) %{ 7342 predicate(UseAVX == 0); 7343 match(Set dst (XorV dst src)); 7344 format %{ "pxor $dst,$src\t! xor vectors" %} 7345 ins_encode %{ 7346 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7352 predicate(UseAVX > 0); 7353 match(Set dst (XorV src1 src2)); 7354 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7355 ins_encode %{ 7356 int vlen_enc = vector_length_encoding(this); 7357 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7363 predicate((UseAVX > 0) && 7364 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7365 match(Set dst (XorV src (LoadVector mem))); 7366 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7367 ins_encode %{ 7368 int vlen_enc = vector_length_encoding(this); 7369 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7370 %} 7371 ins_pipe( pipe_slow ); 7372 %} 7373 7374 // --------------------------------- VectorCast -------------------------------------- 7375 7376 instruct vcastBtoX(vec dst, vec src) %{ 7377 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7378 match(Set dst (VectorCastB2X src)); 7379 format %{ "vector_cast_b2x $dst,$src\t!" %} 7380 ins_encode %{ 7381 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7382 int vlen_enc = vector_length_encoding(this); 7383 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7384 %} 7385 ins_pipe( pipe_slow ); 7386 %} 7387 7388 instruct vcastBtoD(legVec dst, legVec src) %{ 7389 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7390 match(Set dst (VectorCastB2X src)); 7391 format %{ "vector_cast_b2x $dst,$src\t!" %} 7392 ins_encode %{ 7393 int vlen_enc = vector_length_encoding(this); 7394 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7395 %} 7396 ins_pipe( pipe_slow ); 7397 %} 7398 7399 instruct castStoX(vec dst, vec src) %{ 7400 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7401 Matcher::vector_length(n->in(1)) <= 8 && // src 7402 Matcher::vector_element_basic_type(n) == T_BYTE); 7403 match(Set dst (VectorCastS2X src)); 7404 format %{ "vector_cast_s2x $dst,$src" %} 7405 ins_encode %{ 7406 assert(UseAVX > 0, "required"); 7407 7408 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7409 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7410 %} 7411 ins_pipe( pipe_slow ); 7412 %} 7413 7414 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7415 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7416 Matcher::vector_length(n->in(1)) == 16 && // src 7417 Matcher::vector_element_basic_type(n) == T_BYTE); 7418 effect(TEMP dst, TEMP vtmp); 7419 match(Set dst (VectorCastS2X src)); 7420 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7421 ins_encode %{ 7422 assert(UseAVX > 0, "required"); 7423 7424 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7425 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7426 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7427 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7428 %} 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 instruct vcastStoX_evex(vec dst, vec src) %{ 7433 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7434 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7435 match(Set dst (VectorCastS2X src)); 7436 format %{ "vector_cast_s2x $dst,$src\t!" %} 7437 ins_encode %{ 7438 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7439 int src_vlen_enc = vector_length_encoding(this, $src); 7440 int vlen_enc = vector_length_encoding(this); 7441 switch (to_elem_bt) { 7442 case T_BYTE: 7443 if (!VM_Version::supports_avx512vl()) { 7444 vlen_enc = Assembler::AVX_512bit; 7445 } 7446 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7447 break; 7448 case T_INT: 7449 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7450 break; 7451 case T_FLOAT: 7452 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7453 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7454 break; 7455 case T_LONG: 7456 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7457 break; 7458 case T_DOUBLE: { 7459 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7460 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7461 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7462 break; 7463 } 7464 default: 7465 ShouldNotReachHere(); 7466 } 7467 %} 7468 ins_pipe( pipe_slow ); 7469 %} 7470 7471 instruct castItoX(vec dst, vec src) %{ 7472 predicate(UseAVX <= 2 && 7473 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7474 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7475 match(Set dst (VectorCastI2X src)); 7476 format %{ "vector_cast_i2x $dst,$src" %} 7477 ins_encode %{ 7478 assert(UseAVX > 0, "required"); 7479 7480 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7481 int vlen_enc = vector_length_encoding(this, $src); 7482 7483 if (to_elem_bt == T_BYTE) { 7484 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7485 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7486 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7487 } else { 7488 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7489 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7490 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7491 } 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7497 predicate(UseAVX <= 2 && 7498 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7499 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7500 match(Set dst (VectorCastI2X src)); 7501 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7502 effect(TEMP dst, TEMP vtmp); 7503 ins_encode %{ 7504 assert(UseAVX > 0, "required"); 7505 7506 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7507 int vlen_enc = vector_length_encoding(this, $src); 7508 7509 if (to_elem_bt == T_BYTE) { 7510 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7511 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7512 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7513 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7514 } else { 7515 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7516 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7517 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7518 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7519 } 7520 %} 7521 ins_pipe( pipe_slow ); 7522 %} 7523 7524 instruct vcastItoX_evex(vec dst, vec src) %{ 7525 predicate(UseAVX > 2 || 7526 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7527 match(Set dst (VectorCastI2X src)); 7528 format %{ "vector_cast_i2x $dst,$src\t!" %} 7529 ins_encode %{ 7530 assert(UseAVX > 0, "required"); 7531 7532 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7533 int src_vlen_enc = vector_length_encoding(this, $src); 7534 int dst_vlen_enc = vector_length_encoding(this); 7535 switch (dst_elem_bt) { 7536 case T_BYTE: 7537 if (!VM_Version::supports_avx512vl()) { 7538 src_vlen_enc = Assembler::AVX_512bit; 7539 } 7540 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7541 break; 7542 case T_SHORT: 7543 if (!VM_Version::supports_avx512vl()) { 7544 src_vlen_enc = Assembler::AVX_512bit; 7545 } 7546 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7547 break; 7548 case T_FLOAT: 7549 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7550 break; 7551 case T_LONG: 7552 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7553 break; 7554 case T_DOUBLE: 7555 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7556 break; 7557 default: 7558 ShouldNotReachHere(); 7559 } 7560 %} 7561 ins_pipe( pipe_slow ); 7562 %} 7563 7564 instruct vcastLtoBS(vec dst, vec src) %{ 7565 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7566 UseAVX <= 2); 7567 match(Set dst (VectorCastL2X src)); 7568 format %{ "vector_cast_l2x $dst,$src" %} 7569 ins_encode %{ 7570 assert(UseAVX > 0, "required"); 7571 7572 int vlen = Matcher::vector_length_in_bytes(this, $src); 7573 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7574 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7575 : ExternalAddress(vector_int_to_short_mask()); 7576 if (vlen <= 16) { 7577 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7578 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7579 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7580 } else { 7581 assert(vlen <= 32, "required"); 7582 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7583 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7584 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7585 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7586 } 7587 if (to_elem_bt == T_BYTE) { 7588 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7589 } 7590 %} 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 instruct vcastLtoX_evex(vec dst, vec src) %{ 7595 predicate(UseAVX > 2 || 7596 (Matcher::vector_element_basic_type(n) == T_INT || 7597 Matcher::vector_element_basic_type(n) == T_FLOAT || 7598 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7599 match(Set dst (VectorCastL2X src)); 7600 format %{ "vector_cast_l2x $dst,$src\t!" %} 7601 ins_encode %{ 7602 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7603 int vlen = Matcher::vector_length_in_bytes(this, $src); 7604 int vlen_enc = vector_length_encoding(this, $src); 7605 switch (to_elem_bt) { 7606 case T_BYTE: 7607 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7608 vlen_enc = Assembler::AVX_512bit; 7609 } 7610 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7611 break; 7612 case T_SHORT: 7613 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7614 vlen_enc = Assembler::AVX_512bit; 7615 } 7616 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7617 break; 7618 case T_INT: 7619 if (vlen == 8) { 7620 if ($dst$$XMMRegister != $src$$XMMRegister) { 7621 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7622 } 7623 } else if (vlen == 16) { 7624 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7625 } else if (vlen == 32) { 7626 if (UseAVX > 2) { 7627 if (!VM_Version::supports_avx512vl()) { 7628 vlen_enc = Assembler::AVX_512bit; 7629 } 7630 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7631 } else { 7632 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7633 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7634 } 7635 } else { // vlen == 64 7636 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7637 } 7638 break; 7639 case T_FLOAT: 7640 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7641 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7642 break; 7643 case T_DOUBLE: 7644 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7645 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7646 break; 7647 7648 default: assert(false, "%s", type2name(to_elem_bt)); 7649 } 7650 %} 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 instruct vcastFtoD_reg(vec dst, vec src) %{ 7655 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7656 match(Set dst (VectorCastF2X src)); 7657 format %{ "vector_cast_f2d $dst,$src\t!" %} 7658 ins_encode %{ 7659 int vlen_enc = vector_length_encoding(this); 7660 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7661 %} 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 7666 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7667 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7668 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7669 match(Set dst (VectorCastF2X src)); 7670 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7671 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7672 ins_encode %{ 7673 int vlen_enc = vector_length_encoding(this, $src); 7674 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7675 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7676 // 32 bit addresses for register indirect addressing mode since stub constants 7677 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7678 // However, targets are free to increase this limit, but having a large code cache size 7679 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7680 // cap we save a temporary register allocation which in limiting case can prevent 7681 // spilling in high register pressure blocks. 7682 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7683 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7684 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7690 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7691 is_integral_type(Matcher::vector_element_basic_type(n))); 7692 match(Set dst (VectorCastF2X src)); 7693 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7694 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7695 ins_encode %{ 7696 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7697 if (to_elem_bt == T_LONG) { 7698 int vlen_enc = vector_length_encoding(this); 7699 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7700 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7701 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7702 } else { 7703 int vlen_enc = vector_length_encoding(this, $src); 7704 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7705 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7706 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7707 } 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vcastDtoF_reg(vec dst, vec src) %{ 7713 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7714 match(Set dst (VectorCastD2X src)); 7715 format %{ "vector_cast_d2x $dst,$src\t!" %} 7716 ins_encode %{ 7717 int vlen_enc = vector_length_encoding(this, $src); 7718 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7724 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7725 is_integral_type(Matcher::vector_element_basic_type(n))); 7726 match(Set dst (VectorCastD2X src)); 7727 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7728 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7729 ins_encode %{ 7730 int vlen_enc = vector_length_encoding(this, $src); 7731 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7732 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7733 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7734 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7740 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7741 is_integral_type(Matcher::vector_element_basic_type(n))); 7742 match(Set dst (VectorCastD2X src)); 7743 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7744 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7745 ins_encode %{ 7746 int vlen_enc = vector_length_encoding(this, $src); 7747 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7748 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7749 ExternalAddress(vector_float_signflip()); 7750 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7751 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 instruct vucast(vec dst, vec src) %{ 7757 match(Set dst (VectorUCastB2X src)); 7758 match(Set dst (VectorUCastS2X src)); 7759 match(Set dst (VectorUCastI2X src)); 7760 format %{ "vector_ucast $dst,$src\t!" %} 7761 ins_encode %{ 7762 assert(UseAVX > 0, "required"); 7763 7764 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7765 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7766 int vlen_enc = vector_length_encoding(this); 7767 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7773 predicate(!VM_Version::supports_avx512vl() && 7774 Matcher::vector_length_in_bytes(n) < 64 && 7775 Matcher::vector_element_basic_type(n) == T_INT); 7776 match(Set dst (RoundVF src)); 7777 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7778 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7779 ins_encode %{ 7780 int vlen_enc = vector_length_encoding(this); 7781 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7782 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7783 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7784 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7790 predicate((VM_Version::supports_avx512vl() || 7791 Matcher::vector_length_in_bytes(n) == 64) && 7792 Matcher::vector_element_basic_type(n) == T_INT); 7793 match(Set dst (RoundVF src)); 7794 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7795 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7796 ins_encode %{ 7797 int vlen_enc = vector_length_encoding(this); 7798 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7799 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7800 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7801 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7802 %} 7803 ins_pipe( pipe_slow ); 7804 %} 7805 7806 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7807 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7808 match(Set dst (RoundVD src)); 7809 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7810 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7811 ins_encode %{ 7812 int vlen_enc = vector_length_encoding(this); 7813 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7814 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7815 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7816 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7817 %} 7818 ins_pipe( pipe_slow ); 7819 %} 7820 7821 // --------------------------------- VectorMaskCmp -------------------------------------- 7822 7823 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7824 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7825 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7826 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7827 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7828 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7829 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7830 ins_encode %{ 7831 int vlen_enc = vector_length_encoding(this, $src1); 7832 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7833 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7834 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7835 } else { 7836 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7837 } 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7843 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7844 n->bottom_type()->isa_vectmask() == nullptr && 7845 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7846 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7847 effect(TEMP ktmp); 7848 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7849 ins_encode %{ 7850 int vlen_enc = Assembler::AVX_512bit; 7851 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7852 KRegister mask = k0; // The comparison itself is not being masked. 7853 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7854 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7855 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7856 } else { 7857 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7858 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7859 } 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7865 predicate(n->bottom_type()->isa_vectmask() && 7866 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7867 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7868 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7869 ins_encode %{ 7870 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7871 int vlen_enc = vector_length_encoding(this, $src1); 7872 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7873 KRegister mask = k0; // The comparison itself is not being masked. 7874 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7875 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7876 } else { 7877 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7878 } 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7884 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7885 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7886 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7887 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7888 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7889 (n->in(2)->get_int() == BoolTest::eq || 7890 n->in(2)->get_int() == BoolTest::lt || 7891 n->in(2)->get_int() == BoolTest::gt)); // cond 7892 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7893 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7894 ins_encode %{ 7895 int vlen_enc = vector_length_encoding(this, $src1); 7896 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7897 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7898 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7904 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7905 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7906 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7907 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7908 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7909 (n->in(2)->get_int() == BoolTest::ne || 7910 n->in(2)->get_int() == BoolTest::le || 7911 n->in(2)->get_int() == BoolTest::ge)); // cond 7912 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7913 effect(TEMP dst, TEMP xtmp); 7914 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7915 ins_encode %{ 7916 int vlen_enc = vector_length_encoding(this, $src1); 7917 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7918 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7919 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7925 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7926 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7927 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7928 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7929 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7930 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7931 effect(TEMP dst, TEMP xtmp); 7932 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7933 ins_encode %{ 7934 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7935 int vlen_enc = vector_length_encoding(this, $src1); 7936 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7937 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7938 7939 if (vlen_enc == Assembler::AVX_128bit) { 7940 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7941 } else { 7942 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7943 } 7944 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7945 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7946 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7952 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7953 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7954 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7955 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7956 effect(TEMP ktmp); 7957 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7958 ins_encode %{ 7959 assert(UseAVX > 2, "required"); 7960 7961 int vlen_enc = vector_length_encoding(this, $src1); 7962 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7963 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7964 KRegister mask = k0; // The comparison itself is not being masked. 7965 bool merge = false; 7966 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7967 7968 switch (src1_elem_bt) { 7969 case T_INT: { 7970 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7971 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7972 break; 7973 } 7974 case T_LONG: { 7975 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7976 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7977 break; 7978 } 7979 default: assert(false, "%s", type2name(src1_elem_bt)); 7980 } 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 7986 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7987 predicate(n->bottom_type()->isa_vectmask() && 7988 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7989 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7990 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7991 ins_encode %{ 7992 assert(UseAVX > 2, "required"); 7993 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7994 7995 int vlen_enc = vector_length_encoding(this, $src1); 7996 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7997 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7998 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7999 8000 // Comparison i 8001 switch (src1_elem_bt) { 8002 case T_BYTE: { 8003 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8004 break; 8005 } 8006 case T_SHORT: { 8007 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8008 break; 8009 } 8010 case T_INT: { 8011 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8012 break; 8013 } 8014 case T_LONG: { 8015 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8016 break; 8017 } 8018 default: assert(false, "%s", type2name(src1_elem_bt)); 8019 } 8020 %} 8021 ins_pipe( pipe_slow ); 8022 %} 8023 8024 // Extract 8025 8026 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8027 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8028 match(Set dst (ExtractI src idx)); 8029 match(Set dst (ExtractS src idx)); 8030 match(Set dst (ExtractB src idx)); 8031 format %{ "extractI $dst,$src,$idx\t!" %} 8032 ins_encode %{ 8033 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8034 8035 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8036 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8037 %} 8038 ins_pipe( pipe_slow ); 8039 %} 8040 8041 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8042 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8043 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8044 match(Set dst (ExtractI src idx)); 8045 match(Set dst (ExtractS src idx)); 8046 match(Set dst (ExtractB src idx)); 8047 effect(TEMP vtmp); 8048 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8049 ins_encode %{ 8050 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8051 8052 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8053 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8054 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8060 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8061 match(Set dst (ExtractL src idx)); 8062 format %{ "extractL $dst,$src,$idx\t!" %} 8063 ins_encode %{ 8064 assert(UseSSE >= 4, "required"); 8065 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8066 8067 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8073 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8074 Matcher::vector_length(n->in(1)) == 8); // src 8075 match(Set dst (ExtractL src idx)); 8076 effect(TEMP vtmp); 8077 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8078 ins_encode %{ 8079 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8080 8081 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8082 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8083 %} 8084 ins_pipe( pipe_slow ); 8085 %} 8086 8087 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8088 predicate(Matcher::vector_length(n->in(1)) <= 4); 8089 match(Set dst (ExtractF src idx)); 8090 effect(TEMP dst, TEMP vtmp); 8091 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8092 ins_encode %{ 8093 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8094 8095 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8096 %} 8097 ins_pipe( pipe_slow ); 8098 %} 8099 8100 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8101 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8102 Matcher::vector_length(n->in(1)/*src*/) == 16); 8103 match(Set dst (ExtractF src idx)); 8104 effect(TEMP vtmp); 8105 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8106 ins_encode %{ 8107 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8108 8109 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8110 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8111 %} 8112 ins_pipe( pipe_slow ); 8113 %} 8114 8115 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8116 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8117 match(Set dst (ExtractD src idx)); 8118 format %{ "extractD $dst,$src,$idx\t!" %} 8119 ins_encode %{ 8120 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8121 8122 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8123 %} 8124 ins_pipe( pipe_slow ); 8125 %} 8126 8127 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8128 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8129 Matcher::vector_length(n->in(1)) == 8); // src 8130 match(Set dst (ExtractD src idx)); 8131 effect(TEMP vtmp); 8132 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8133 ins_encode %{ 8134 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8135 8136 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8137 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 // --------------------------------- Vector Blend -------------------------------------- 8143 8144 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8145 predicate(UseAVX == 0); 8146 match(Set dst (VectorBlend (Binary dst src) mask)); 8147 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8148 effect(TEMP tmp); 8149 ins_encode %{ 8150 assert(UseSSE >= 4, "required"); 8151 8152 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8153 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8154 } 8155 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8156 %} 8157 ins_pipe( pipe_slow ); 8158 %} 8159 8160 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8161 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8162 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8163 Matcher::vector_length_in_bytes(n) <= 32 && 8164 is_integral_type(Matcher::vector_element_basic_type(n))); 8165 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8166 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8167 ins_encode %{ 8168 int vlen_enc = vector_length_encoding(this); 8169 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8170 %} 8171 ins_pipe( pipe_slow ); 8172 %} 8173 8174 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8175 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8176 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8177 Matcher::vector_length_in_bytes(n) <= 32 && 8178 !is_integral_type(Matcher::vector_element_basic_type(n))); 8179 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8180 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8181 ins_encode %{ 8182 int vlen_enc = vector_length_encoding(this); 8183 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8189 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8190 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8191 Matcher::vector_length_in_bytes(n) <= 32); 8192 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8193 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8194 effect(TEMP vtmp, TEMP dst); 8195 ins_encode %{ 8196 int vlen_enc = vector_length_encoding(this); 8197 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8198 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8199 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8200 %} 8201 ins_pipe( pipe_slow ); 8202 %} 8203 8204 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8205 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8206 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8207 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8208 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8209 effect(TEMP ktmp); 8210 ins_encode %{ 8211 int vlen_enc = Assembler::AVX_512bit; 8212 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8213 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8214 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8215 %} 8216 ins_pipe( pipe_slow ); 8217 %} 8218 8219 8220 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8221 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8222 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8223 VM_Version::supports_avx512bw())); 8224 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8225 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8226 ins_encode %{ 8227 int vlen_enc = vector_length_encoding(this); 8228 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8229 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8230 %} 8231 ins_pipe( pipe_slow ); 8232 %} 8233 8234 // --------------------------------- ABS -------------------------------------- 8235 // a = |a| 8236 instruct vabsB_reg(vec dst, vec src) %{ 8237 match(Set dst (AbsVB src)); 8238 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8239 ins_encode %{ 8240 uint vlen = Matcher::vector_length(this); 8241 if (vlen <= 16) { 8242 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8243 } else { 8244 int vlen_enc = vector_length_encoding(this); 8245 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8246 } 8247 %} 8248 ins_pipe( pipe_slow ); 8249 %} 8250 8251 instruct vabsS_reg(vec dst, vec src) %{ 8252 match(Set dst (AbsVS src)); 8253 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8254 ins_encode %{ 8255 uint vlen = Matcher::vector_length(this); 8256 if (vlen <= 8) { 8257 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8258 } else { 8259 int vlen_enc = vector_length_encoding(this); 8260 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8261 } 8262 %} 8263 ins_pipe( pipe_slow ); 8264 %} 8265 8266 instruct vabsI_reg(vec dst, vec src) %{ 8267 match(Set dst (AbsVI src)); 8268 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8269 ins_encode %{ 8270 uint vlen = Matcher::vector_length(this); 8271 if (vlen <= 4) { 8272 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8273 } else { 8274 int vlen_enc = vector_length_encoding(this); 8275 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8276 } 8277 %} 8278 ins_pipe( pipe_slow ); 8279 %} 8280 8281 instruct vabsL_reg(vec dst, vec src) %{ 8282 match(Set dst (AbsVL src)); 8283 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8284 ins_encode %{ 8285 assert(UseAVX > 2, "required"); 8286 int vlen_enc = vector_length_encoding(this); 8287 if (!VM_Version::supports_avx512vl()) { 8288 vlen_enc = Assembler::AVX_512bit; 8289 } 8290 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8291 %} 8292 ins_pipe( pipe_slow ); 8293 %} 8294 8295 // --------------------------------- ABSNEG -------------------------------------- 8296 8297 instruct vabsnegF(vec dst, vec src) %{ 8298 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8299 match(Set dst (AbsVF src)); 8300 match(Set dst (NegVF src)); 8301 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8302 ins_cost(150); 8303 ins_encode %{ 8304 int opcode = this->ideal_Opcode(); 8305 int vlen = Matcher::vector_length(this); 8306 if (vlen == 2) { 8307 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8308 } else { 8309 assert(vlen == 8 || vlen == 16, "required"); 8310 int vlen_enc = vector_length_encoding(this); 8311 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8312 } 8313 %} 8314 ins_pipe( pipe_slow ); 8315 %} 8316 8317 instruct vabsneg4F(vec dst) %{ 8318 predicate(Matcher::vector_length(n) == 4); 8319 match(Set dst (AbsVF dst)); 8320 match(Set dst (NegVF dst)); 8321 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8322 ins_cost(150); 8323 ins_encode %{ 8324 int opcode = this->ideal_Opcode(); 8325 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vabsnegD(vec dst, vec src) %{ 8331 match(Set dst (AbsVD src)); 8332 match(Set dst (NegVD src)); 8333 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8334 ins_encode %{ 8335 int opcode = this->ideal_Opcode(); 8336 uint vlen = Matcher::vector_length(this); 8337 if (vlen == 2) { 8338 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8339 } else { 8340 int vlen_enc = vector_length_encoding(this); 8341 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8342 } 8343 %} 8344 ins_pipe( pipe_slow ); 8345 %} 8346 8347 //------------------------------------- VectorTest -------------------------------------------- 8348 8349 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8350 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8351 match(Set cr (VectorTest src1 src2)); 8352 effect(TEMP vtmp); 8353 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8354 ins_encode %{ 8355 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8356 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8357 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8358 %} 8359 ins_pipe( pipe_slow ); 8360 %} 8361 8362 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8363 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8364 match(Set cr (VectorTest src1 src2)); 8365 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8366 ins_encode %{ 8367 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8368 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8369 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8370 %} 8371 ins_pipe( pipe_slow ); 8372 %} 8373 8374 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8375 predicate((Matcher::vector_length(n->in(1)) < 8 || 8376 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8377 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8378 match(Set cr (VectorTest src1 src2)); 8379 effect(TEMP tmp); 8380 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8381 ins_encode %{ 8382 uint masklen = Matcher::vector_length(this, $src1); 8383 __ kmovwl($tmp$$Register, $src1$$KRegister); 8384 __ andl($tmp$$Register, (1 << masklen) - 1); 8385 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8386 %} 8387 ins_pipe( pipe_slow ); 8388 %} 8389 8390 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8391 predicate((Matcher::vector_length(n->in(1)) < 8 || 8392 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8393 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8394 match(Set cr (VectorTest src1 src2)); 8395 effect(TEMP tmp); 8396 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8397 ins_encode %{ 8398 uint masklen = Matcher::vector_length(this, $src1); 8399 __ kmovwl($tmp$$Register, $src1$$KRegister); 8400 __ andl($tmp$$Register, (1 << masklen) - 1); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8406 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8407 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8408 match(Set cr (VectorTest src1 src2)); 8409 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8410 ins_encode %{ 8411 uint masklen = Matcher::vector_length(this, $src1); 8412 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8413 %} 8414 ins_pipe( pipe_slow ); 8415 %} 8416 8417 //------------------------------------- LoadMask -------------------------------------------- 8418 8419 instruct loadMask(legVec dst, legVec src) %{ 8420 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8421 match(Set dst (VectorLoadMask src)); 8422 effect(TEMP dst); 8423 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8424 ins_encode %{ 8425 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8426 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8427 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8428 %} 8429 ins_pipe( pipe_slow ); 8430 %} 8431 8432 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8433 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8434 match(Set dst (VectorLoadMask src)); 8435 effect(TEMP xtmp); 8436 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8437 ins_encode %{ 8438 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8439 true, Assembler::AVX_512bit); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8445 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8446 match(Set dst (VectorLoadMask src)); 8447 effect(TEMP xtmp); 8448 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8449 ins_encode %{ 8450 int vlen_enc = vector_length_encoding(in(1)); 8451 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8452 false, vlen_enc); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 //------------------------------------- StoreMask -------------------------------------------- 8458 8459 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8460 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8461 match(Set dst (VectorStoreMask src size)); 8462 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8463 ins_encode %{ 8464 int vlen = Matcher::vector_length(this); 8465 if (vlen <= 16 && UseAVX <= 2) { 8466 assert(UseSSE >= 3, "required"); 8467 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8468 } else { 8469 assert(UseAVX > 0, "required"); 8470 int src_vlen_enc = vector_length_encoding(this, $src); 8471 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8472 } 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8478 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8479 match(Set dst (VectorStoreMask src size)); 8480 effect(TEMP_DEF dst, TEMP xtmp); 8481 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8482 ins_encode %{ 8483 int vlen_enc = Assembler::AVX_128bit; 8484 int vlen = Matcher::vector_length(this); 8485 if (vlen <= 8) { 8486 assert(UseSSE >= 3, "required"); 8487 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8488 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8489 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8490 } else { 8491 assert(UseAVX > 0, "required"); 8492 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8493 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8494 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8495 } 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8501 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8502 match(Set dst (VectorStoreMask src size)); 8503 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8504 effect(TEMP_DEF dst, TEMP xtmp); 8505 ins_encode %{ 8506 int vlen_enc = Assembler::AVX_128bit; 8507 int vlen = Matcher::vector_length(this); 8508 if (vlen <= 4) { 8509 assert(UseSSE >= 3, "required"); 8510 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8511 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8512 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8513 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8514 } else { 8515 assert(UseAVX > 0, "required"); 8516 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8517 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8518 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8519 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8520 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8521 } 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8527 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8528 match(Set dst (VectorStoreMask src size)); 8529 effect(TEMP_DEF dst, TEMP xtmp); 8530 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8531 ins_encode %{ 8532 assert(UseSSE >= 3, "required"); 8533 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8534 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8535 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8536 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8537 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8538 %} 8539 ins_pipe( pipe_slow ); 8540 %} 8541 8542 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8543 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8544 match(Set dst (VectorStoreMask src size)); 8545 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8546 effect(TEMP_DEF dst, TEMP vtmp); 8547 ins_encode %{ 8548 int vlen_enc = Assembler::AVX_128bit; 8549 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8550 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8551 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8552 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8553 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8554 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8555 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8556 %} 8557 ins_pipe( pipe_slow ); 8558 %} 8559 8560 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8561 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8562 match(Set dst (VectorStoreMask src size)); 8563 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8564 ins_encode %{ 8565 int src_vlen_enc = vector_length_encoding(this, $src); 8566 int dst_vlen_enc = vector_length_encoding(this); 8567 if (!VM_Version::supports_avx512vl()) { 8568 src_vlen_enc = Assembler::AVX_512bit; 8569 } 8570 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8571 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8572 %} 8573 ins_pipe( pipe_slow ); 8574 %} 8575 8576 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8577 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8578 match(Set dst (VectorStoreMask src size)); 8579 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8580 ins_encode %{ 8581 int src_vlen_enc = vector_length_encoding(this, $src); 8582 int dst_vlen_enc = vector_length_encoding(this); 8583 if (!VM_Version::supports_avx512vl()) { 8584 src_vlen_enc = Assembler::AVX_512bit; 8585 } 8586 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8587 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8588 %} 8589 ins_pipe( pipe_slow ); 8590 %} 8591 8592 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8593 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8594 match(Set dst (VectorStoreMask mask size)); 8595 effect(TEMP_DEF dst); 8596 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8597 ins_encode %{ 8598 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8599 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8600 false, Assembler::AVX_512bit, noreg); 8601 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8607 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8608 match(Set dst (VectorStoreMask mask size)); 8609 effect(TEMP_DEF dst); 8610 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8611 ins_encode %{ 8612 int dst_vlen_enc = vector_length_encoding(this); 8613 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8614 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8615 %} 8616 ins_pipe( pipe_slow ); 8617 %} 8618 8619 instruct vmaskcast_evex(kReg dst) %{ 8620 match(Set dst (VectorMaskCast dst)); 8621 ins_cost(0); 8622 format %{ "vector_mask_cast $dst" %} 8623 ins_encode %{ 8624 // empty 8625 %} 8626 ins_pipe(empty); 8627 %} 8628 8629 instruct vmaskcast(vec dst) %{ 8630 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8631 match(Set dst (VectorMaskCast dst)); 8632 ins_cost(0); 8633 format %{ "vector_mask_cast $dst" %} 8634 ins_encode %{ 8635 // empty 8636 %} 8637 ins_pipe(empty); 8638 %} 8639 8640 instruct vmaskcast_avx(vec dst, vec src) %{ 8641 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8642 match(Set dst (VectorMaskCast src)); 8643 format %{ "vector_mask_cast $dst, $src" %} 8644 ins_encode %{ 8645 int vlen = Matcher::vector_length(this); 8646 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8647 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8648 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8649 %} 8650 ins_pipe(pipe_slow); 8651 %} 8652 8653 //-------------------------------- Load Iota Indices ---------------------------------- 8654 8655 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8656 match(Set dst (VectorLoadConst src)); 8657 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8658 ins_encode %{ 8659 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8660 BasicType bt = Matcher::vector_element_basic_type(this); 8661 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8662 %} 8663 ins_pipe( pipe_slow ); 8664 %} 8665 8666 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8667 match(Set dst (PopulateIndex src1 src2)); 8668 effect(TEMP dst, TEMP vtmp); 8669 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8670 ins_encode %{ 8671 assert($src2$$constant == 1, "required"); 8672 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8673 int vlen_enc = vector_length_encoding(this); 8674 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8675 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8676 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8677 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8683 match(Set dst (PopulateIndex src1 src2)); 8684 effect(TEMP dst, TEMP vtmp); 8685 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8686 ins_encode %{ 8687 assert($src2$$constant == 1, "required"); 8688 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8689 int vlen_enc = vector_length_encoding(this); 8690 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8691 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8692 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8693 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8694 %} 8695 ins_pipe( pipe_slow ); 8696 %} 8697 8698 //-------------------------------- Rearrange ---------------------------------- 8699 8700 // LoadShuffle/Rearrange for Byte 8701 instruct rearrangeB(vec dst, vec shuffle) %{ 8702 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8703 Matcher::vector_length(n) < 32); 8704 match(Set dst (VectorRearrange dst shuffle)); 8705 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8706 ins_encode %{ 8707 assert(UseSSE >= 4, "required"); 8708 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8714 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8715 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8716 match(Set dst (VectorRearrange src shuffle)); 8717 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8718 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8719 ins_encode %{ 8720 assert(UseAVX >= 2, "required"); 8721 // Swap src into vtmp1 8722 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8723 // Shuffle swapped src to get entries from other 128 bit lane 8724 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8725 // Shuffle original src to get entries from self 128 bit lane 8726 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8727 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8728 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8729 // Perform the blend 8730 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 8736 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8737 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8738 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8739 match(Set dst (VectorRearrange src shuffle)); 8740 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8741 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8742 ins_encode %{ 8743 int vlen_enc = vector_length_encoding(this); 8744 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8745 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8746 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8747 %} 8748 ins_pipe( pipe_slow ); 8749 %} 8750 8751 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8752 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8753 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8754 match(Set dst (VectorRearrange src shuffle)); 8755 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8756 ins_encode %{ 8757 int vlen_enc = vector_length_encoding(this); 8758 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8759 %} 8760 ins_pipe( pipe_slow ); 8761 %} 8762 8763 // LoadShuffle/Rearrange for Short 8764 8765 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8766 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8767 !VM_Version::supports_avx512bw()); 8768 match(Set dst (VectorLoadShuffle src)); 8769 effect(TEMP dst, TEMP vtmp); 8770 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8771 ins_encode %{ 8772 // Create a byte shuffle mask from short shuffle mask 8773 // only byte shuffle instruction available on these platforms 8774 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8775 if (UseAVX == 0) { 8776 assert(vlen_in_bytes <= 16, "required"); 8777 // Multiply each shuffle by two to get byte index 8778 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8779 __ psllw($vtmp$$XMMRegister, 1); 8780 8781 // Duplicate to create 2 copies of byte index 8782 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8783 __ psllw($dst$$XMMRegister, 8); 8784 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8785 8786 // Add one to get alternate byte index 8787 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8788 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8789 } else { 8790 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8791 int vlen_enc = vector_length_encoding(this); 8792 // Multiply each shuffle by two to get byte index 8793 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8794 8795 // Duplicate to create 2 copies of byte index 8796 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8797 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8798 8799 // Add one to get alternate byte index 8800 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8801 } 8802 %} 8803 ins_pipe( pipe_slow ); 8804 %} 8805 8806 instruct rearrangeS(vec dst, vec shuffle) %{ 8807 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8808 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8809 match(Set dst (VectorRearrange dst shuffle)); 8810 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8811 ins_encode %{ 8812 assert(UseSSE >= 4, "required"); 8813 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8814 %} 8815 ins_pipe( pipe_slow ); 8816 %} 8817 8818 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8819 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8820 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8821 match(Set dst (VectorRearrange src shuffle)); 8822 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8823 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8824 ins_encode %{ 8825 assert(UseAVX >= 2, "required"); 8826 // Swap src into vtmp1 8827 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8828 // Shuffle swapped src to get entries from other 128 bit lane 8829 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8830 // Shuffle original src to get entries from self 128 bit lane 8831 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8832 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8833 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8834 // Perform the blend 8835 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8836 %} 8837 ins_pipe( pipe_slow ); 8838 %} 8839 8840 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8841 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8842 VM_Version::supports_avx512bw()); 8843 match(Set dst (VectorRearrange src shuffle)); 8844 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8845 ins_encode %{ 8846 int vlen_enc = vector_length_encoding(this); 8847 if (!VM_Version::supports_avx512vl()) { 8848 vlen_enc = Assembler::AVX_512bit; 8849 } 8850 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 // LoadShuffle/Rearrange for Integer and Float 8856 8857 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8858 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8859 Matcher::vector_length(n) == 4 && UseAVX == 0); 8860 match(Set dst (VectorLoadShuffle src)); 8861 effect(TEMP dst, TEMP vtmp); 8862 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8863 ins_encode %{ 8864 assert(UseSSE >= 4, "required"); 8865 8866 // Create a byte shuffle mask from int shuffle mask 8867 // only byte shuffle instruction available on these platforms 8868 8869 // Duplicate and multiply each shuffle by 4 8870 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8871 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8872 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8873 __ psllw($vtmp$$XMMRegister, 2); 8874 8875 // Duplicate again to create 4 copies of byte index 8876 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8877 __ psllw($dst$$XMMRegister, 8); 8878 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8879 8880 // Add 3,2,1,0 to get alternate byte index 8881 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8882 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct rearrangeI(vec dst, vec shuffle) %{ 8888 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8889 UseAVX == 0); 8890 match(Set dst (VectorRearrange dst shuffle)); 8891 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8892 ins_encode %{ 8893 assert(UseSSE >= 4, "required"); 8894 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8895 %} 8896 ins_pipe( pipe_slow ); 8897 %} 8898 8899 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8900 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8901 UseAVX > 0); 8902 match(Set dst (VectorRearrange src shuffle)); 8903 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8904 ins_encode %{ 8905 int vlen_enc = vector_length_encoding(this); 8906 BasicType bt = Matcher::vector_element_basic_type(this); 8907 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8908 %} 8909 ins_pipe( pipe_slow ); 8910 %} 8911 8912 // LoadShuffle/Rearrange for Long and Double 8913 8914 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8915 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8916 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8917 match(Set dst (VectorLoadShuffle src)); 8918 effect(TEMP dst, TEMP vtmp); 8919 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8920 ins_encode %{ 8921 assert(UseAVX >= 2, "required"); 8922 8923 int vlen_enc = vector_length_encoding(this); 8924 // Create a double word shuffle mask from long shuffle mask 8925 // only double word shuffle instruction available on these platforms 8926 8927 // Multiply each shuffle by two to get double word index 8928 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8929 8930 // Duplicate each double word shuffle 8931 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8932 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8933 8934 // Add one to get alternate double word index 8935 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8941 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8942 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8943 match(Set dst (VectorRearrange src shuffle)); 8944 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8945 ins_encode %{ 8946 assert(UseAVX >= 2, "required"); 8947 8948 int vlen_enc = vector_length_encoding(this); 8949 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8950 %} 8951 ins_pipe( pipe_slow ); 8952 %} 8953 8954 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8955 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8956 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8957 match(Set dst (VectorRearrange src shuffle)); 8958 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8959 ins_encode %{ 8960 assert(UseAVX > 2, "required"); 8961 8962 int vlen_enc = vector_length_encoding(this); 8963 if (vlen_enc == Assembler::AVX_128bit) { 8964 vlen_enc = Assembler::AVX_256bit; 8965 } 8966 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8967 %} 8968 ins_pipe( pipe_slow ); 8969 %} 8970 8971 // --------------------------------- FMA -------------------------------------- 8972 // a * b + c 8973 8974 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8975 match(Set c (FmaVF c (Binary a b))); 8976 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8977 ins_cost(150); 8978 ins_encode %{ 8979 assert(UseFMA, "not enabled"); 8980 int vlen_enc = vector_length_encoding(this); 8981 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8987 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8988 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8989 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8990 ins_cost(150); 8991 ins_encode %{ 8992 assert(UseFMA, "not enabled"); 8993 int vlen_enc = vector_length_encoding(this); 8994 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8995 %} 8996 ins_pipe( pipe_slow ); 8997 %} 8998 8999 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9000 match(Set c (FmaVD c (Binary a b))); 9001 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9002 ins_cost(150); 9003 ins_encode %{ 9004 assert(UseFMA, "not enabled"); 9005 int vlen_enc = vector_length_encoding(this); 9006 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9007 %} 9008 ins_pipe( pipe_slow ); 9009 %} 9010 9011 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9012 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9013 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9014 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9015 ins_cost(150); 9016 ins_encode %{ 9017 assert(UseFMA, "not enabled"); 9018 int vlen_enc = vector_length_encoding(this); 9019 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9020 %} 9021 ins_pipe( pipe_slow ); 9022 %} 9023 9024 // --------------------------------- Vector Multiply Add -------------------------------------- 9025 9026 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9027 predicate(UseAVX == 0); 9028 match(Set dst (MulAddVS2VI dst src1)); 9029 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9030 ins_encode %{ 9031 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9037 predicate(UseAVX > 0); 9038 match(Set dst (MulAddVS2VI src1 src2)); 9039 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9040 ins_encode %{ 9041 int vlen_enc = vector_length_encoding(this); 9042 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9048 9049 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9050 predicate(VM_Version::supports_avx512_vnni()); 9051 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9052 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9053 ins_encode %{ 9054 assert(UseAVX > 2, "required"); 9055 int vlen_enc = vector_length_encoding(this); 9056 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9057 %} 9058 ins_pipe( pipe_slow ); 9059 ins_cost(10); 9060 %} 9061 9062 // --------------------------------- PopCount -------------------------------------- 9063 9064 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9065 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9066 match(Set dst (PopCountVI src)); 9067 match(Set dst (PopCountVL src)); 9068 format %{ "vector_popcount_integral $dst, $src" %} 9069 ins_encode %{ 9070 int opcode = this->ideal_Opcode(); 9071 int vlen_enc = vector_length_encoding(this, $src); 9072 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9073 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9074 %} 9075 ins_pipe( pipe_slow ); 9076 %} 9077 9078 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9079 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9080 match(Set dst (PopCountVI src mask)); 9081 match(Set dst (PopCountVL src mask)); 9082 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9083 ins_encode %{ 9084 int vlen_enc = vector_length_encoding(this, $src); 9085 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9086 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9087 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9088 %} 9089 ins_pipe( pipe_slow ); 9090 %} 9091 9092 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9093 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9094 match(Set dst (PopCountVI src)); 9095 match(Set dst (PopCountVL src)); 9096 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9097 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9098 ins_encode %{ 9099 int opcode = this->ideal_Opcode(); 9100 int vlen_enc = vector_length_encoding(this, $src); 9101 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9102 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9103 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9109 9110 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9111 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9112 Matcher::vector_length_in_bytes(n->in(1)))); 9113 match(Set dst (CountTrailingZerosV src)); 9114 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9115 ins_cost(400); 9116 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9117 ins_encode %{ 9118 int vlen_enc = vector_length_encoding(this, $src); 9119 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9120 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9121 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9122 %} 9123 ins_pipe( pipe_slow ); 9124 %} 9125 9126 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9127 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9128 VM_Version::supports_avx512cd() && 9129 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9130 match(Set dst (CountTrailingZerosV src)); 9131 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9132 ins_cost(400); 9133 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9134 ins_encode %{ 9135 int vlen_enc = vector_length_encoding(this, $src); 9136 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9137 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9138 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 %} 9142 9143 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9144 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9145 match(Set dst (CountTrailingZerosV src)); 9146 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9147 ins_cost(400); 9148 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9149 ins_encode %{ 9150 int vlen_enc = vector_length_encoding(this, $src); 9151 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9152 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9153 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9154 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9155 %} 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9160 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9161 match(Set dst (CountTrailingZerosV src)); 9162 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9163 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9164 ins_encode %{ 9165 int vlen_enc = vector_length_encoding(this, $src); 9166 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9167 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9168 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9169 %} 9170 ins_pipe( pipe_slow ); 9171 %} 9172 9173 9174 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9175 9176 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9177 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9178 effect(TEMP dst); 9179 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9180 ins_encode %{ 9181 int vector_len = vector_length_encoding(this); 9182 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9183 %} 9184 ins_pipe( pipe_slow ); 9185 %} 9186 9187 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9188 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9189 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9190 effect(TEMP dst); 9191 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9192 ins_encode %{ 9193 int vector_len = vector_length_encoding(this); 9194 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9195 %} 9196 ins_pipe( pipe_slow ); 9197 %} 9198 9199 // --------------------------------- Rotation Operations ---------------------------------- 9200 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9201 match(Set dst (RotateLeftV src shift)); 9202 match(Set dst (RotateRightV src shift)); 9203 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9204 ins_encode %{ 9205 int opcode = this->ideal_Opcode(); 9206 int vector_len = vector_length_encoding(this); 9207 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9208 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9209 %} 9210 ins_pipe( pipe_slow ); 9211 %} 9212 9213 instruct vprorate(vec dst, vec src, vec shift) %{ 9214 match(Set dst (RotateLeftV src shift)); 9215 match(Set dst (RotateRightV src shift)); 9216 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9217 ins_encode %{ 9218 int opcode = this->ideal_Opcode(); 9219 int vector_len = vector_length_encoding(this); 9220 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9221 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9222 %} 9223 ins_pipe( pipe_slow ); 9224 %} 9225 9226 // ---------------------------------- Masked Operations ------------------------------------ 9227 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9228 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9229 match(Set dst (LoadVectorMasked mem mask)); 9230 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9231 ins_encode %{ 9232 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9233 int vlen_enc = vector_length_encoding(this); 9234 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 9240 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9241 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9242 match(Set dst (LoadVectorMasked mem mask)); 9243 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9244 ins_encode %{ 9245 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9246 int vector_len = vector_length_encoding(this); 9247 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9248 %} 9249 ins_pipe( pipe_slow ); 9250 %} 9251 9252 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9253 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9254 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9255 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9256 ins_encode %{ 9257 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9258 int vlen_enc = vector_length_encoding(src_node); 9259 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9260 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9261 %} 9262 ins_pipe( pipe_slow ); 9263 %} 9264 9265 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9266 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9267 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9268 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9269 ins_encode %{ 9270 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9271 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9272 int vlen_enc = vector_length_encoding(src_node); 9273 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9279 match(Set addr (VerifyVectorAlignment addr mask)); 9280 effect(KILL cr); 9281 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9282 ins_encode %{ 9283 Label Lskip; 9284 // check if masked bits of addr are zero 9285 __ testq($addr$$Register, $mask$$constant); 9286 __ jccb(Assembler::equal, Lskip); 9287 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9288 __ bind(Lskip); 9289 %} 9290 ins_pipe(pipe_slow); 9291 %} 9292 9293 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9294 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9295 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9296 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9297 ins_encode %{ 9298 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9299 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9300 9301 Label DONE; 9302 int vlen_enc = vector_length_encoding(this, $src1); 9303 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9304 9305 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9306 __ mov64($dst$$Register, -1L); 9307 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9308 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9309 __ jccb(Assembler::carrySet, DONE); 9310 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9311 __ notq($dst$$Register); 9312 __ tzcntq($dst$$Register, $dst$$Register); 9313 __ bind(DONE); 9314 %} 9315 ins_pipe( pipe_slow ); 9316 %} 9317 9318 9319 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9320 match(Set dst (VectorMaskGen len)); 9321 effect(TEMP temp, KILL cr); 9322 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9323 ins_encode %{ 9324 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9325 %} 9326 ins_pipe( pipe_slow ); 9327 %} 9328 9329 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9330 match(Set dst (VectorMaskGen len)); 9331 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9332 effect(TEMP temp); 9333 ins_encode %{ 9334 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9335 __ kmovql($dst$$KRegister, $temp$$Register); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9341 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9342 match(Set dst (VectorMaskToLong mask)); 9343 effect(TEMP dst, KILL cr); 9344 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9345 ins_encode %{ 9346 int opcode = this->ideal_Opcode(); 9347 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9348 int mask_len = Matcher::vector_length(this, $mask); 9349 int mask_size = mask_len * type2aelembytes(mbt); 9350 int vlen_enc = vector_length_encoding(this, $mask); 9351 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9352 $dst$$Register, mask_len, mask_size, vlen_enc); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9358 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9359 match(Set dst (VectorMaskToLong mask)); 9360 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9361 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9362 ins_encode %{ 9363 int opcode = this->ideal_Opcode(); 9364 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9365 int mask_len = Matcher::vector_length(this, $mask); 9366 int vlen_enc = vector_length_encoding(this, $mask); 9367 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9368 $dst$$Register, mask_len, mbt, vlen_enc); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9374 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9375 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9376 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9377 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9378 ins_encode %{ 9379 int opcode = this->ideal_Opcode(); 9380 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9381 int mask_len = Matcher::vector_length(this, $mask); 9382 int vlen_enc = vector_length_encoding(this, $mask); 9383 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9384 $dst$$Register, mask_len, mbt, vlen_enc); 9385 %} 9386 ins_pipe( pipe_slow ); 9387 %} 9388 9389 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9390 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9391 match(Set dst (VectorMaskTrueCount mask)); 9392 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9393 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9394 ins_encode %{ 9395 int opcode = this->ideal_Opcode(); 9396 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9397 int mask_len = Matcher::vector_length(this, $mask); 9398 int mask_size = mask_len * type2aelembytes(mbt); 9399 int vlen_enc = vector_length_encoding(this, $mask); 9400 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9401 $tmp$$Register, mask_len, mask_size, vlen_enc); 9402 %} 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9407 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9408 match(Set dst (VectorMaskTrueCount mask)); 9409 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9410 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9411 ins_encode %{ 9412 int opcode = this->ideal_Opcode(); 9413 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9414 int mask_len = Matcher::vector_length(this, $mask); 9415 int vlen_enc = vector_length_encoding(this, $mask); 9416 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9417 $tmp$$Register, mask_len, mbt, vlen_enc); 9418 %} 9419 ins_pipe( pipe_slow ); 9420 %} 9421 9422 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9423 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9424 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9425 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9426 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9427 ins_encode %{ 9428 int opcode = this->ideal_Opcode(); 9429 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9430 int mask_len = Matcher::vector_length(this, $mask); 9431 int vlen_enc = vector_length_encoding(this, $mask); 9432 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9433 $tmp$$Register, mask_len, mbt, vlen_enc); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9439 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9440 match(Set dst (VectorMaskFirstTrue mask)); 9441 match(Set dst (VectorMaskLastTrue mask)); 9442 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9443 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9444 ins_encode %{ 9445 int opcode = this->ideal_Opcode(); 9446 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9447 int mask_len = Matcher::vector_length(this, $mask); 9448 int mask_size = mask_len * type2aelembytes(mbt); 9449 int vlen_enc = vector_length_encoding(this, $mask); 9450 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9451 $tmp$$Register, mask_len, mask_size, vlen_enc); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9457 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9458 match(Set dst (VectorMaskFirstTrue mask)); 9459 match(Set dst (VectorMaskLastTrue mask)); 9460 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9461 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9462 ins_encode %{ 9463 int opcode = this->ideal_Opcode(); 9464 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9465 int mask_len = Matcher::vector_length(this, $mask); 9466 int vlen_enc = vector_length_encoding(this, $mask); 9467 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9468 $tmp$$Register, mask_len, mbt, vlen_enc); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9474 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9475 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9476 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9477 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9478 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9479 ins_encode %{ 9480 int opcode = this->ideal_Opcode(); 9481 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9482 int mask_len = Matcher::vector_length(this, $mask); 9483 int vlen_enc = vector_length_encoding(this, $mask); 9484 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9485 $tmp$$Register, mask_len, mbt, vlen_enc); 9486 %} 9487 ins_pipe( pipe_slow ); 9488 %} 9489 9490 // --------------------------------- Compress/Expand Operations --------------------------- 9491 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9492 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9493 match(Set dst (CompressV src mask)); 9494 match(Set dst (ExpandV src mask)); 9495 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9496 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9497 ins_encode %{ 9498 int opcode = this->ideal_Opcode(); 9499 int vlen_enc = vector_length_encoding(this); 9500 BasicType bt = Matcher::vector_element_basic_type(this); 9501 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9502 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9508 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9509 match(Set dst (CompressV src mask)); 9510 match(Set dst (ExpandV src mask)); 9511 format %{ "vector_compress_expand $dst, $src, $mask" %} 9512 ins_encode %{ 9513 int opcode = this->ideal_Opcode(); 9514 int vector_len = vector_length_encoding(this); 9515 BasicType bt = Matcher::vector_element_basic_type(this); 9516 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9517 %} 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9522 match(Set dst (CompressM mask)); 9523 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9524 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9525 ins_encode %{ 9526 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9527 int mask_len = Matcher::vector_length(this); 9528 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9529 %} 9530 ins_pipe( pipe_slow ); 9531 %} 9532 9533 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9534 9535 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9536 predicate(!VM_Version::supports_gfni()); 9537 match(Set dst (ReverseV src)); 9538 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9539 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9540 ins_encode %{ 9541 int vec_enc = vector_length_encoding(this); 9542 BasicType bt = Matcher::vector_element_basic_type(this); 9543 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9544 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9550 predicate(VM_Version::supports_gfni()); 9551 match(Set dst (ReverseV src)); 9552 effect(TEMP dst, TEMP xtmp); 9553 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9554 ins_encode %{ 9555 int vec_enc = vector_length_encoding(this); 9556 BasicType bt = Matcher::vector_element_basic_type(this); 9557 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9558 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9559 $xtmp$$XMMRegister); 9560 %} 9561 ins_pipe( pipe_slow ); 9562 %} 9563 9564 instruct vreverse_byte_reg(vec dst, vec src) %{ 9565 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9566 match(Set dst (ReverseBytesV src)); 9567 effect(TEMP dst); 9568 format %{ "vector_reverse_byte $dst, $src" %} 9569 ins_encode %{ 9570 int vec_enc = vector_length_encoding(this); 9571 BasicType bt = Matcher::vector_element_basic_type(this); 9572 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9573 %} 9574 ins_pipe( pipe_slow ); 9575 %} 9576 9577 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9578 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9579 match(Set dst (ReverseBytesV src)); 9580 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9581 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9582 ins_encode %{ 9583 int vec_enc = vector_length_encoding(this); 9584 BasicType bt = Matcher::vector_element_basic_type(this); 9585 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9586 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9592 9593 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9594 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9595 Matcher::vector_length_in_bytes(n->in(1)))); 9596 match(Set dst (CountLeadingZerosV src)); 9597 format %{ "vector_count_leading_zeros $dst, $src" %} 9598 ins_encode %{ 9599 int vlen_enc = vector_length_encoding(this, $src); 9600 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9601 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9602 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9608 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9609 Matcher::vector_length_in_bytes(n->in(1)))); 9610 match(Set dst (CountLeadingZerosV src mask)); 9611 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9612 ins_encode %{ 9613 int vlen_enc = vector_length_encoding(this, $src); 9614 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9615 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9616 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9617 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9618 %} 9619 ins_pipe( pipe_slow ); 9620 %} 9621 9622 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9623 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9624 VM_Version::supports_avx512cd() && 9625 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9626 match(Set dst (CountLeadingZerosV src)); 9627 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9628 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9629 ins_encode %{ 9630 int vlen_enc = vector_length_encoding(this, $src); 9631 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9632 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9633 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9634 %} 9635 ins_pipe( pipe_slow ); 9636 %} 9637 9638 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9639 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9640 match(Set dst (CountLeadingZerosV src)); 9641 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9642 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9643 ins_encode %{ 9644 int vlen_enc = vector_length_encoding(this, $src); 9645 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9646 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9647 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9648 $rtmp$$Register, true, vlen_enc); 9649 %} 9650 ins_pipe( pipe_slow ); 9651 %} 9652 9653 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9654 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9655 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9656 match(Set dst (CountLeadingZerosV src)); 9657 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9658 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9659 ins_encode %{ 9660 int vlen_enc = vector_length_encoding(this, $src); 9661 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9662 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9663 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9664 %} 9665 ins_pipe( pipe_slow ); 9666 %} 9667 9668 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9669 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9670 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9671 match(Set dst (CountLeadingZerosV src)); 9672 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9673 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9674 ins_encode %{ 9675 int vlen_enc = vector_length_encoding(this, $src); 9676 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9677 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9678 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9679 %} 9680 ins_pipe( pipe_slow ); 9681 %} 9682 9683 // ---------------------------------- Vector Masked Operations ------------------------------------ 9684 9685 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9686 match(Set dst (AddVB (Binary dst src2) mask)); 9687 match(Set dst (AddVS (Binary dst src2) mask)); 9688 match(Set dst (AddVI (Binary dst src2) mask)); 9689 match(Set dst (AddVL (Binary dst src2) mask)); 9690 match(Set dst (AddVF (Binary dst src2) mask)); 9691 match(Set dst (AddVD (Binary dst src2) mask)); 9692 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9693 ins_encode %{ 9694 int vlen_enc = vector_length_encoding(this); 9695 BasicType bt = Matcher::vector_element_basic_type(this); 9696 int opc = this->ideal_Opcode(); 9697 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9698 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9699 %} 9700 ins_pipe( pipe_slow ); 9701 %} 9702 9703 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9704 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9705 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9706 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9707 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9708 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9709 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9710 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9711 ins_encode %{ 9712 int vlen_enc = vector_length_encoding(this); 9713 BasicType bt = Matcher::vector_element_basic_type(this); 9714 int opc = this->ideal_Opcode(); 9715 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9716 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9722 match(Set dst (XorV (Binary dst src2) mask)); 9723 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9724 ins_encode %{ 9725 int vlen_enc = vector_length_encoding(this); 9726 BasicType bt = Matcher::vector_element_basic_type(this); 9727 int opc = this->ideal_Opcode(); 9728 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9729 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9730 %} 9731 ins_pipe( pipe_slow ); 9732 %} 9733 9734 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9735 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9736 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9737 ins_encode %{ 9738 int vlen_enc = vector_length_encoding(this); 9739 BasicType bt = Matcher::vector_element_basic_type(this); 9740 int opc = this->ideal_Opcode(); 9741 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9742 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9743 %} 9744 ins_pipe( pipe_slow ); 9745 %} 9746 9747 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9748 match(Set dst (OrV (Binary dst src2) mask)); 9749 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9750 ins_encode %{ 9751 int vlen_enc = vector_length_encoding(this); 9752 BasicType bt = Matcher::vector_element_basic_type(this); 9753 int opc = this->ideal_Opcode(); 9754 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9755 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9756 %} 9757 ins_pipe( pipe_slow ); 9758 %} 9759 9760 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9761 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9762 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9763 ins_encode %{ 9764 int vlen_enc = vector_length_encoding(this); 9765 BasicType bt = Matcher::vector_element_basic_type(this); 9766 int opc = this->ideal_Opcode(); 9767 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9768 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9769 %} 9770 ins_pipe( pipe_slow ); 9771 %} 9772 9773 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9774 match(Set dst (AndV (Binary dst src2) mask)); 9775 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9776 ins_encode %{ 9777 int vlen_enc = vector_length_encoding(this); 9778 BasicType bt = Matcher::vector_element_basic_type(this); 9779 int opc = this->ideal_Opcode(); 9780 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9781 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9787 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9788 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9789 ins_encode %{ 9790 int vlen_enc = vector_length_encoding(this); 9791 BasicType bt = Matcher::vector_element_basic_type(this); 9792 int opc = this->ideal_Opcode(); 9793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9794 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9795 %} 9796 ins_pipe( pipe_slow ); 9797 %} 9798 9799 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9800 match(Set dst (SubVB (Binary dst src2) mask)); 9801 match(Set dst (SubVS (Binary dst src2) mask)); 9802 match(Set dst (SubVI (Binary dst src2) mask)); 9803 match(Set dst (SubVL (Binary dst src2) mask)); 9804 match(Set dst (SubVF (Binary dst src2) mask)); 9805 match(Set dst (SubVD (Binary dst src2) mask)); 9806 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9807 ins_encode %{ 9808 int vlen_enc = vector_length_encoding(this); 9809 BasicType bt = Matcher::vector_element_basic_type(this); 9810 int opc = this->ideal_Opcode(); 9811 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9812 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9813 %} 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9818 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9819 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9820 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9821 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9822 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9823 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9824 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9825 ins_encode %{ 9826 int vlen_enc = vector_length_encoding(this); 9827 BasicType bt = Matcher::vector_element_basic_type(this); 9828 int opc = this->ideal_Opcode(); 9829 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9830 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9836 match(Set dst (MulVS (Binary dst src2) mask)); 9837 match(Set dst (MulVI (Binary dst src2) mask)); 9838 match(Set dst (MulVL (Binary dst src2) mask)); 9839 match(Set dst (MulVF (Binary dst src2) mask)); 9840 match(Set dst (MulVD (Binary dst src2) mask)); 9841 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9842 ins_encode %{ 9843 int vlen_enc = vector_length_encoding(this); 9844 BasicType bt = Matcher::vector_element_basic_type(this); 9845 int opc = this->ideal_Opcode(); 9846 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9847 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9848 %} 9849 ins_pipe( pipe_slow ); 9850 %} 9851 9852 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9853 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9854 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9855 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9856 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9857 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9858 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9859 ins_encode %{ 9860 int vlen_enc = vector_length_encoding(this); 9861 BasicType bt = Matcher::vector_element_basic_type(this); 9862 int opc = this->ideal_Opcode(); 9863 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9864 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9865 %} 9866 ins_pipe( pipe_slow ); 9867 %} 9868 9869 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9870 match(Set dst (SqrtVF dst mask)); 9871 match(Set dst (SqrtVD dst mask)); 9872 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9873 ins_encode %{ 9874 int vlen_enc = vector_length_encoding(this); 9875 BasicType bt = Matcher::vector_element_basic_type(this); 9876 int opc = this->ideal_Opcode(); 9877 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9878 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9879 %} 9880 ins_pipe( pipe_slow ); 9881 %} 9882 9883 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9884 match(Set dst (DivVF (Binary dst src2) mask)); 9885 match(Set dst (DivVD (Binary dst src2) mask)); 9886 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9887 ins_encode %{ 9888 int vlen_enc = vector_length_encoding(this); 9889 BasicType bt = Matcher::vector_element_basic_type(this); 9890 int opc = this->ideal_Opcode(); 9891 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9892 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9893 %} 9894 ins_pipe( pipe_slow ); 9895 %} 9896 9897 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9898 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9899 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9900 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9901 ins_encode %{ 9902 int vlen_enc = vector_length_encoding(this); 9903 BasicType bt = Matcher::vector_element_basic_type(this); 9904 int opc = this->ideal_Opcode(); 9905 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9906 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9907 %} 9908 ins_pipe( pipe_slow ); 9909 %} 9910 9911 9912 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9913 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9914 match(Set dst (RotateRightV (Binary dst shift) mask)); 9915 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9916 ins_encode %{ 9917 int vlen_enc = vector_length_encoding(this); 9918 BasicType bt = Matcher::vector_element_basic_type(this); 9919 int opc = this->ideal_Opcode(); 9920 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9921 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9922 %} 9923 ins_pipe( pipe_slow ); 9924 %} 9925 9926 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9927 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9928 match(Set dst (RotateRightV (Binary dst src2) mask)); 9929 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9930 ins_encode %{ 9931 int vlen_enc = vector_length_encoding(this); 9932 BasicType bt = Matcher::vector_element_basic_type(this); 9933 int opc = this->ideal_Opcode(); 9934 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9935 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9936 %} 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9941 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9942 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9943 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9944 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9945 ins_encode %{ 9946 int vlen_enc = vector_length_encoding(this); 9947 BasicType bt = Matcher::vector_element_basic_type(this); 9948 int opc = this->ideal_Opcode(); 9949 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9950 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9951 %} 9952 ins_pipe( pipe_slow ); 9953 %} 9954 9955 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9956 predicate(!n->as_ShiftV()->is_var_shift()); 9957 match(Set dst (LShiftVS (Binary dst src2) mask)); 9958 match(Set dst (LShiftVI (Binary dst src2) mask)); 9959 match(Set dst (LShiftVL (Binary dst src2) mask)); 9960 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9961 ins_encode %{ 9962 int vlen_enc = vector_length_encoding(this); 9963 BasicType bt = Matcher::vector_element_basic_type(this); 9964 int opc = this->ideal_Opcode(); 9965 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9966 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9967 %} 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9972 predicate(n->as_ShiftV()->is_var_shift()); 9973 match(Set dst (LShiftVS (Binary dst src2) mask)); 9974 match(Set dst (LShiftVI (Binary dst src2) mask)); 9975 match(Set dst (LShiftVL (Binary dst src2) mask)); 9976 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9977 ins_encode %{ 9978 int vlen_enc = vector_length_encoding(this); 9979 BasicType bt = Matcher::vector_element_basic_type(this); 9980 int opc = this->ideal_Opcode(); 9981 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9982 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9988 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9989 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9990 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9991 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9992 ins_encode %{ 9993 int vlen_enc = vector_length_encoding(this); 9994 BasicType bt = Matcher::vector_element_basic_type(this); 9995 int opc = this->ideal_Opcode(); 9996 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9997 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9998 %} 9999 ins_pipe( pipe_slow ); 10000 %} 10001 10002 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10003 predicate(!n->as_ShiftV()->is_var_shift()); 10004 match(Set dst (RShiftVS (Binary dst src2) mask)); 10005 match(Set dst (RShiftVI (Binary dst src2) mask)); 10006 match(Set dst (RShiftVL (Binary dst src2) mask)); 10007 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10008 ins_encode %{ 10009 int vlen_enc = vector_length_encoding(this); 10010 BasicType bt = Matcher::vector_element_basic_type(this); 10011 int opc = this->ideal_Opcode(); 10012 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10013 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10014 %} 10015 ins_pipe( pipe_slow ); 10016 %} 10017 10018 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10019 predicate(n->as_ShiftV()->is_var_shift()); 10020 match(Set dst (RShiftVS (Binary dst src2) mask)); 10021 match(Set dst (RShiftVI (Binary dst src2) mask)); 10022 match(Set dst (RShiftVL (Binary dst src2) mask)); 10023 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10024 ins_encode %{ 10025 int vlen_enc = vector_length_encoding(this); 10026 BasicType bt = Matcher::vector_element_basic_type(this); 10027 int opc = this->ideal_Opcode(); 10028 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10029 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10035 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10036 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10037 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10038 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10039 ins_encode %{ 10040 int vlen_enc = vector_length_encoding(this); 10041 BasicType bt = Matcher::vector_element_basic_type(this); 10042 int opc = this->ideal_Opcode(); 10043 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10044 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10045 %} 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10050 predicate(!n->as_ShiftV()->is_var_shift()); 10051 match(Set dst (URShiftVS (Binary dst src2) mask)); 10052 match(Set dst (URShiftVI (Binary dst src2) mask)); 10053 match(Set dst (URShiftVL (Binary dst src2) mask)); 10054 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10055 ins_encode %{ 10056 int vlen_enc = vector_length_encoding(this); 10057 BasicType bt = Matcher::vector_element_basic_type(this); 10058 int opc = this->ideal_Opcode(); 10059 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10060 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10066 predicate(n->as_ShiftV()->is_var_shift()); 10067 match(Set dst (URShiftVS (Binary dst src2) mask)); 10068 match(Set dst (URShiftVI (Binary dst src2) mask)); 10069 match(Set dst (URShiftVL (Binary dst src2) mask)); 10070 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10071 ins_encode %{ 10072 int vlen_enc = vector_length_encoding(this); 10073 BasicType bt = Matcher::vector_element_basic_type(this); 10074 int opc = this->ideal_Opcode(); 10075 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10076 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10077 %} 10078 ins_pipe( pipe_slow ); 10079 %} 10080 10081 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10082 match(Set dst (MaxV (Binary dst src2) mask)); 10083 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10084 ins_encode %{ 10085 int vlen_enc = vector_length_encoding(this); 10086 BasicType bt = Matcher::vector_element_basic_type(this); 10087 int opc = this->ideal_Opcode(); 10088 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10089 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10090 %} 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10095 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10096 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10097 ins_encode %{ 10098 int vlen_enc = vector_length_encoding(this); 10099 BasicType bt = Matcher::vector_element_basic_type(this); 10100 int opc = this->ideal_Opcode(); 10101 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10102 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10108 match(Set dst (MinV (Binary dst src2) mask)); 10109 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10110 ins_encode %{ 10111 int vlen_enc = vector_length_encoding(this); 10112 BasicType bt = Matcher::vector_element_basic_type(this); 10113 int opc = this->ideal_Opcode(); 10114 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10115 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10116 %} 10117 ins_pipe( pipe_slow ); 10118 %} 10119 10120 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10121 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10122 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10123 ins_encode %{ 10124 int vlen_enc = vector_length_encoding(this); 10125 BasicType bt = Matcher::vector_element_basic_type(this); 10126 int opc = this->ideal_Opcode(); 10127 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10128 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10134 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10135 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10136 ins_encode %{ 10137 int vlen_enc = vector_length_encoding(this); 10138 BasicType bt = Matcher::vector_element_basic_type(this); 10139 int opc = this->ideal_Opcode(); 10140 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10141 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 instruct vabs_masked(vec dst, kReg mask) %{ 10147 match(Set dst (AbsVB dst mask)); 10148 match(Set dst (AbsVS dst mask)); 10149 match(Set dst (AbsVI dst mask)); 10150 match(Set dst (AbsVL dst mask)); 10151 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10152 ins_encode %{ 10153 int vlen_enc = vector_length_encoding(this); 10154 BasicType bt = Matcher::vector_element_basic_type(this); 10155 int opc = this->ideal_Opcode(); 10156 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10157 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10163 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10164 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10165 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10166 ins_encode %{ 10167 assert(UseFMA, "Needs FMA instructions support."); 10168 int vlen_enc = vector_length_encoding(this); 10169 BasicType bt = Matcher::vector_element_basic_type(this); 10170 int opc = this->ideal_Opcode(); 10171 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10172 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10173 %} 10174 ins_pipe( pipe_slow ); 10175 %} 10176 10177 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10178 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10179 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10180 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10181 ins_encode %{ 10182 assert(UseFMA, "Needs FMA instructions support."); 10183 int vlen_enc = vector_length_encoding(this); 10184 BasicType bt = Matcher::vector_element_basic_type(this); 10185 int opc = this->ideal_Opcode(); 10186 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10187 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10188 %} 10189 ins_pipe( pipe_slow ); 10190 %} 10191 10192 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10193 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10194 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10195 ins_encode %{ 10196 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10197 int vlen_enc = vector_length_encoding(this, $src1); 10198 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10199 10200 // Comparison i 10201 switch (src1_elem_bt) { 10202 case T_BYTE: { 10203 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10204 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10205 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10206 break; 10207 } 10208 case T_SHORT: { 10209 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10210 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10211 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10212 break; 10213 } 10214 case T_INT: { 10215 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10216 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10217 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10218 break; 10219 } 10220 case T_LONG: { 10221 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10222 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10223 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10224 break; 10225 } 10226 case T_FLOAT: { 10227 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10228 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10229 break; 10230 } 10231 case T_DOUBLE: { 10232 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10233 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10234 break; 10235 } 10236 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10237 } 10238 %} 10239 ins_pipe( pipe_slow ); 10240 %} 10241 10242 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10243 predicate(Matcher::vector_length(n) <= 32); 10244 match(Set dst (MaskAll src)); 10245 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10246 ins_encode %{ 10247 int mask_len = Matcher::vector_length(this); 10248 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10249 %} 10250 ins_pipe( pipe_slow ); 10251 %} 10252 10253 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10254 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10255 match(Set dst (XorVMask src (MaskAll cnt))); 10256 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10257 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10258 ins_encode %{ 10259 uint masklen = Matcher::vector_length(this); 10260 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10261 %} 10262 ins_pipe( pipe_slow ); 10263 %} 10264 10265 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10266 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10267 (Matcher::vector_length(n) == 16) || 10268 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10269 match(Set dst (XorVMask src (MaskAll cnt))); 10270 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10271 ins_encode %{ 10272 uint masklen = Matcher::vector_length(this); 10273 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10274 %} 10275 ins_pipe( pipe_slow ); 10276 %} 10277 10278 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10279 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10280 match(Set dst (VectorLongToMask src)); 10281 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10282 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10283 ins_encode %{ 10284 int mask_len = Matcher::vector_length(this); 10285 int vec_enc = vector_length_encoding(mask_len); 10286 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10287 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10288 %} 10289 ins_pipe( pipe_slow ); 10290 %} 10291 10292 10293 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10294 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10295 match(Set dst (VectorLongToMask src)); 10296 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10297 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10298 ins_encode %{ 10299 int mask_len = Matcher::vector_length(this); 10300 assert(mask_len <= 32, "invalid mask length"); 10301 int vec_enc = vector_length_encoding(mask_len); 10302 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10303 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10304 %} 10305 ins_pipe( pipe_slow ); 10306 %} 10307 10308 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10309 predicate(n->bottom_type()->isa_vectmask()); 10310 match(Set dst (VectorLongToMask src)); 10311 format %{ "long_to_mask_evex $dst, $src\t!" %} 10312 ins_encode %{ 10313 __ kmov($dst$$KRegister, $src$$Register); 10314 %} 10315 ins_pipe( pipe_slow ); 10316 %} 10317 10318 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10319 match(Set dst (AndVMask src1 src2)); 10320 match(Set dst (OrVMask src1 src2)); 10321 match(Set dst (XorVMask src1 src2)); 10322 effect(TEMP kscratch); 10323 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10324 ins_encode %{ 10325 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10326 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10327 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10328 uint masklen = Matcher::vector_length(this); 10329 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10330 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10331 %} 10332 ins_pipe( pipe_slow ); 10333 %} 10334 10335 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10336 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10337 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10338 ins_encode %{ 10339 int vlen_enc = vector_length_encoding(this); 10340 BasicType bt = Matcher::vector_element_basic_type(this); 10341 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10342 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10343 %} 10344 ins_pipe( pipe_slow ); 10345 %} 10346 10347 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10348 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10349 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10350 ins_encode %{ 10351 int vlen_enc = vector_length_encoding(this); 10352 BasicType bt = Matcher::vector_element_basic_type(this); 10353 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10354 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 instruct castMM(kReg dst) 10360 %{ 10361 match(Set dst (CastVV dst)); 10362 10363 size(0); 10364 format %{ "# castVV of $dst" %} 10365 ins_encode(/* empty encoding */); 10366 ins_cost(0); 10367 ins_pipe(empty); 10368 %} 10369 10370 instruct castVV(vec dst) 10371 %{ 10372 match(Set dst (CastVV dst)); 10373 10374 size(0); 10375 format %{ "# castVV of $dst" %} 10376 ins_encode(/* empty encoding */); 10377 ins_cost(0); 10378 ins_pipe(empty); 10379 %} 10380 10381 instruct castVVLeg(legVec dst) 10382 %{ 10383 match(Set dst (CastVV dst)); 10384 10385 size(0); 10386 format %{ "# castVV of $dst" %} 10387 ins_encode(/* empty encoding */); 10388 ins_cost(0); 10389 ins_pipe(empty); 10390 %} 10391 10392 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10393 %{ 10394 match(Set dst (IsInfiniteF src)); 10395 effect(TEMP ktmp, KILL cr); 10396 format %{ "float_class_check $dst, $src" %} 10397 ins_encode %{ 10398 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10399 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10400 %} 10401 ins_pipe(pipe_slow); 10402 %} 10403 10404 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10405 %{ 10406 match(Set dst (IsInfiniteD src)); 10407 effect(TEMP ktmp, KILL cr); 10408 format %{ "double_class_check $dst, $src" %} 10409 ins_encode %{ 10410 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10411 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10412 %} 10413 ins_pipe(pipe_slow); 10414 %} 10415 10416 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10417 %{ 10418 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10419 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10420 match(Set dst (SaturatingAddV src1 src2)); 10421 match(Set dst (SaturatingSubV src1 src2)); 10422 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10423 ins_encode %{ 10424 int vlen_enc = vector_length_encoding(this); 10425 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10426 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10427 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10428 %} 10429 ins_pipe(pipe_slow); 10430 %} 10431 10432 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10433 %{ 10434 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10435 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10436 match(Set dst (SaturatingAddV src1 src2)); 10437 match(Set dst (SaturatingSubV src1 src2)); 10438 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10439 ins_encode %{ 10440 int vlen_enc = vector_length_encoding(this); 10441 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10442 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10443 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10444 %} 10445 ins_pipe(pipe_slow); 10446 %} 10447 10448 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10449 %{ 10450 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10451 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10452 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10453 match(Set dst (SaturatingAddV src1 src2)); 10454 match(Set dst (SaturatingSubV src1 src2)); 10455 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10456 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10457 ins_encode %{ 10458 int vlen_enc = vector_length_encoding(this); 10459 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10460 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10461 $src1$$XMMRegister, $src2$$XMMRegister, 10462 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10463 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10464 %} 10465 ins_pipe(pipe_slow); 10466 %} 10467 10468 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10469 %{ 10470 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10471 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10472 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10473 match(Set dst (SaturatingAddV src1 src2)); 10474 match(Set dst (SaturatingSubV src1 src2)); 10475 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10476 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10477 ins_encode %{ 10478 int vlen_enc = vector_length_encoding(this); 10479 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10480 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10481 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10482 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10483 %} 10484 ins_pipe(pipe_slow); 10485 %} 10486 10487 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10488 %{ 10489 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10490 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10491 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10492 match(Set dst (SaturatingAddV src1 src2)); 10493 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10494 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10495 ins_encode %{ 10496 int vlen_enc = vector_length_encoding(this); 10497 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10498 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10499 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10500 %} 10501 ins_pipe(pipe_slow); 10502 %} 10503 10504 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10505 %{ 10506 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10507 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10508 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10509 match(Set dst (SaturatingAddV src1 src2)); 10510 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10511 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10512 ins_encode %{ 10513 int vlen_enc = vector_length_encoding(this); 10514 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10515 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10516 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10517 %} 10518 ins_pipe(pipe_slow); 10519 %} 10520 10521 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10522 %{ 10523 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10524 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10525 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10526 match(Set dst (SaturatingSubV src1 src2)); 10527 effect(TEMP ktmp); 10528 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10529 ins_encode %{ 10530 int vlen_enc = vector_length_encoding(this); 10531 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10532 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10533 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10534 %} 10535 ins_pipe(pipe_slow); 10536 %} 10537 10538 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10539 %{ 10540 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10541 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10542 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10543 match(Set dst (SaturatingSubV src1 src2)); 10544 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10545 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10546 ins_encode %{ 10547 int vlen_enc = vector_length_encoding(this); 10548 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10549 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10550 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10551 %} 10552 ins_pipe(pipe_slow); 10553 %} 10554 10555 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10556 %{ 10557 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10558 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10559 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10560 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10561 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10562 ins_encode %{ 10563 int vlen_enc = vector_length_encoding(this); 10564 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10565 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10566 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10567 %} 10568 ins_pipe(pipe_slow); 10569 %} 10570 10571 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10572 %{ 10573 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10574 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10575 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10576 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10577 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10578 ins_encode %{ 10579 int vlen_enc = vector_length_encoding(this); 10580 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10581 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10582 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10583 %} 10584 ins_pipe(pipe_slow); 10585 %} 10586 10587 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10588 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10589 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10590 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10591 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10592 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10593 ins_encode %{ 10594 int vlen_enc = vector_length_encoding(this); 10595 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10596 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10597 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10598 %} 10599 ins_pipe( pipe_slow ); 10600 %} 10601 10602 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10603 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10604 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10605 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10606 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10607 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10608 ins_encode %{ 10609 int vlen_enc = vector_length_encoding(this); 10610 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10611 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10612 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10618 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10619 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10620 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10621 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10622 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10623 ins_encode %{ 10624 int vlen_enc = vector_length_encoding(this); 10625 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10626 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10627 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10628 %} 10629 ins_pipe( pipe_slow ); 10630 %} 10631 10632 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10633 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10634 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10635 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10636 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10637 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10638 ins_encode %{ 10639 int vlen_enc = vector_length_encoding(this); 10640 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10641 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10642 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10643 %} 10644 ins_pipe( pipe_slow ); 10645 %} 10646 10647 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10648 %{ 10649 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10650 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10651 ins_encode %{ 10652 int vlen_enc = vector_length_encoding(this); 10653 BasicType bt = Matcher::vector_element_basic_type(this); 10654 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10655 %} 10656 ins_pipe(pipe_slow); 10657 %} 10658 10659 instruct reinterpretS2HF(regF dst, rRegI src) 10660 %{ 10661 match(Set dst (ReinterpretS2HF src)); 10662 format %{ "vmovw $dst, $src" %} 10663 ins_encode %{ 10664 __ vmovw($dst$$XMMRegister, $src$$Register); 10665 %} 10666 ins_pipe(pipe_slow); 10667 %} 10668 10669 instruct reinterpretHF2S(rRegI dst, regF src) 10670 %{ 10671 match(Set dst (ReinterpretHF2S src)); 10672 format %{ "vmovw $dst, $src" %} 10673 ins_encode %{ 10674 __ vmovw($dst$$Register, $src$$XMMRegister); 10675 %} 10676 ins_pipe(pipe_slow); 10677 %} 10678 10679 instruct convF2HFAndS2HF(regF dst, regF src) 10680 %{ 10681 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10682 format %{ "convF2HFAndS2HF $dst, $src" %} 10683 ins_encode %{ 10684 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10685 %} 10686 ins_pipe(pipe_slow); 10687 %} 10688 10689 instruct convHF2SAndHF2F(regF dst, regF src) 10690 %{ 10691 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10692 format %{ "convHF2SAndHF2F $dst, $src" %} 10693 ins_encode %{ 10694 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10695 %} 10696 ins_pipe(pipe_slow); 10697 %} 10698 10699 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10700 %{ 10701 match(Set dst (SqrtHF src)); 10702 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10703 ins_encode %{ 10704 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10705 %} 10706 ins_pipe(pipe_slow); 10707 %} 10708 10709 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10710 %{ 10711 match(Set dst (AddHF src1 src2)); 10712 match(Set dst (DivHF src1 src2)); 10713 match(Set dst (MulHF src1 src2)); 10714 match(Set dst (SubHF src1 src2)); 10715 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10716 ins_encode %{ 10717 int opcode = this->ideal_Opcode(); 10718 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10719 %} 10720 ins_pipe(pipe_slow); 10721 %} 10722 10723 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) 10724 %{ 10725 predicate(VM_Version::supports_avx10_2()); 10726 match(Set dst (MaxHF src1 src2)); 10727 match(Set dst (MinHF src1 src2)); 10728 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} 10729 ins_encode %{ 10730 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10731 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); 10732 %} 10733 ins_pipe( pipe_slow ); 10734 %} 10735 10736 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10737 %{ 10738 predicate(!VM_Version::supports_avx10_2()); 10739 match(Set dst (MaxHF src1 src2)); 10740 match(Set dst (MinHF src1 src2)); 10741 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10742 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10743 ins_encode %{ 10744 int opcode = this->ideal_Opcode(); 10745 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10746 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10747 %} 10748 ins_pipe( pipe_slow ); 10749 %} 10750 10751 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10752 %{ 10753 match(Set dst (FmaHF src2 (Binary dst src1))); 10754 effect(DEF dst); 10755 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10756 ins_encode %{ 10757 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10758 %} 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 10763 instruct vector_sqrt_HF_reg(vec dst, vec src) 10764 %{ 10765 match(Set dst (SqrtVHF src)); 10766 format %{ "vector_sqrt_fp16 $dst, $src" %} 10767 ins_encode %{ 10768 int vlen_enc = vector_length_encoding(this); 10769 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10770 %} 10771 ins_pipe(pipe_slow); 10772 %} 10773 10774 instruct vector_sqrt_HF_mem(vec dst, memory src) 10775 %{ 10776 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10777 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10778 ins_encode %{ 10779 int vlen_enc = vector_length_encoding(this); 10780 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10781 %} 10782 ins_pipe(pipe_slow); 10783 %} 10784 10785 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10786 %{ 10787 match(Set dst (AddVHF src1 src2)); 10788 match(Set dst (DivVHF src1 src2)); 10789 match(Set dst (MulVHF src1 src2)); 10790 match(Set dst (SubVHF src1 src2)); 10791 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10792 ins_encode %{ 10793 int vlen_enc = vector_length_encoding(this); 10794 int opcode = this->ideal_Opcode(); 10795 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10796 %} 10797 ins_pipe(pipe_slow); 10798 %} 10799 10800 10801 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10802 %{ 10803 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10804 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10805 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10806 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10807 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10808 ins_encode %{ 10809 int vlen_enc = vector_length_encoding(this); 10810 int opcode = this->ideal_Opcode(); 10811 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10812 %} 10813 ins_pipe(pipe_slow); 10814 %} 10815 10816 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10817 %{ 10818 match(Set dst (FmaVHF src2 (Binary dst src1))); 10819 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10820 ins_encode %{ 10821 int vlen_enc = vector_length_encoding(this); 10822 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10823 %} 10824 ins_pipe( pipe_slow ); 10825 %} 10826 10827 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10828 %{ 10829 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10830 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10831 ins_encode %{ 10832 int vlen_enc = vector_length_encoding(this); 10833 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10834 %} 10835 ins_pipe( pipe_slow ); 10836 %} 10837 10838 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) 10839 %{ 10840 predicate(VM_Version::supports_avx10_2()); 10841 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); 10842 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); 10843 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} 10844 ins_encode %{ 10845 int vlen_enc = vector_length_encoding(this); 10846 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10847 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); 10848 %} 10849 ins_pipe( pipe_slow ); 10850 %} 10851 10852 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) 10853 %{ 10854 predicate(VM_Version::supports_avx10_2()); 10855 match(Set dst (MinVHF src1 src2)); 10856 match(Set dst (MaxVHF src1 src2)); 10857 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} 10858 ins_encode %{ 10859 int vlen_enc = vector_length_encoding(this); 10860 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10861 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); 10862 %} 10863 ins_pipe( pipe_slow ); 10864 %} 10865 10866 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 10867 %{ 10868 predicate(!VM_Version::supports_avx10_2()); 10869 match(Set dst (MinVHF src1 src2)); 10870 match(Set dst (MaxVHF src1 src2)); 10871 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10872 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10873 ins_encode %{ 10874 int vlen_enc = vector_length_encoding(this); 10875 int opcode = this->ideal_Opcode(); 10876 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 10877 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10878 %} 10879 ins_pipe( pipe_slow ); 10880 %}