1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128) { 1835 return false; 1836 } 1837 if ((size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1838 return false; 1839 } 1840 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1841 return false; 1842 } 1843 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1844 return false; 1845 } 1846 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1847 return false; 1848 } 1849 break; 1850 case Op_MaskAll: 1851 if (!VM_Version::supports_evex()) { 1852 return false; 1853 } 1854 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1855 return false; 1856 } 1857 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1858 return false; 1859 } 1860 break; 1861 case Op_VectorMaskCmp: 1862 if (vlen < 2 || size_in_bits < 32) { 1863 return false; 1864 } 1865 break; 1866 case Op_CompressM: 1867 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1868 return false; 1869 } 1870 break; 1871 case Op_CompressV: 1872 case Op_ExpandV: 1873 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1874 return false; 1875 } 1876 if (size_in_bits < 128 ) { 1877 return false; 1878 } 1879 case Op_VectorLongToMask: 1880 if (UseAVX < 1) { 1881 return false; 1882 } 1883 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1884 return false; 1885 } 1886 break; 1887 case Op_SignumVD: 1888 case Op_SignumVF: 1889 if (UseAVX < 1) { 1890 return false; 1891 } 1892 break; 1893 case Op_PopCountVI: 1894 case Op_PopCountVL: { 1895 if (!is_pop_count_instr_target(bt) && 1896 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1897 return false; 1898 } 1899 } 1900 break; 1901 case Op_ReverseV: 1902 case Op_ReverseBytesV: 1903 if (UseAVX < 2) { 1904 return false; 1905 } 1906 break; 1907 case Op_CountTrailingZerosV: 1908 case Op_CountLeadingZerosV: 1909 if (UseAVX < 2) { 1910 return false; 1911 } 1912 break; 1913 } 1914 return true; // Per default match rules are supported. 1915 } 1916 1917 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1918 // ADLC based match_rule_supported routine checks for the existence of pattern based 1919 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1920 // of their non-masked counterpart with mask edge being the differentiator. 1921 // This routine does a strict check on the existence of masked operation patterns 1922 // by returning a default false value for all the other opcodes apart from the 1923 // ones whose masked instruction patterns are defined in this file. 1924 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1925 return false; 1926 } 1927 1928 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1929 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1930 return false; 1931 } 1932 switch(opcode) { 1933 // Unary masked operations 1934 case Op_AbsVB: 1935 case Op_AbsVS: 1936 if(!VM_Version::supports_avx512bw()) { 1937 return false; // Implementation limitation 1938 } 1939 case Op_AbsVI: 1940 case Op_AbsVL: 1941 return true; 1942 1943 // Ternary masked operations 1944 case Op_FmaVF: 1945 case Op_FmaVD: 1946 return true; 1947 1948 case Op_MacroLogicV: 1949 if(bt != T_INT && bt != T_LONG) { 1950 return false; 1951 } 1952 return true; 1953 1954 // Binary masked operations 1955 case Op_AddVB: 1956 case Op_AddVS: 1957 case Op_SubVB: 1958 case Op_SubVS: 1959 case Op_MulVS: 1960 case Op_LShiftVS: 1961 case Op_RShiftVS: 1962 case Op_URShiftVS: 1963 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1964 if (!VM_Version::supports_avx512bw()) { 1965 return false; // Implementation limitation 1966 } 1967 return true; 1968 1969 case Op_MulVL: 1970 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1971 if (!VM_Version::supports_avx512dq()) { 1972 return false; // Implementation limitation 1973 } 1974 return true; 1975 1976 case Op_AndV: 1977 case Op_OrV: 1978 case Op_XorV: 1979 case Op_RotateRightV: 1980 case Op_RotateLeftV: 1981 if (bt != T_INT && bt != T_LONG) { 1982 return false; // Implementation limitation 1983 } 1984 return true; 1985 1986 case Op_VectorLoadMask: 1987 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1988 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1989 return false; 1990 } 1991 return true; 1992 1993 case Op_AddVI: 1994 case Op_AddVL: 1995 case Op_AddVF: 1996 case Op_AddVD: 1997 case Op_SubVI: 1998 case Op_SubVL: 1999 case Op_SubVF: 2000 case Op_SubVD: 2001 case Op_MulVI: 2002 case Op_MulVF: 2003 case Op_MulVD: 2004 case Op_DivVF: 2005 case Op_DivVD: 2006 case Op_SqrtVF: 2007 case Op_SqrtVD: 2008 case Op_LShiftVI: 2009 case Op_LShiftVL: 2010 case Op_RShiftVI: 2011 case Op_RShiftVL: 2012 case Op_URShiftVI: 2013 case Op_URShiftVL: 2014 case Op_LoadVectorMasked: 2015 case Op_StoreVectorMasked: 2016 case Op_LoadVectorGatherMasked: 2017 case Op_StoreVectorScatterMasked: 2018 return true; 2019 2020 case Op_UMinV: 2021 case Op_UMaxV: 2022 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2023 return false; 2024 } // fallthrough 2025 case Op_MaxV: 2026 case Op_MinV: 2027 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2028 return false; // Implementation limitation 2029 } 2030 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { 2031 return false; // Implementation limitation 2032 } 2033 return true; 2034 case Op_SaturatingAddV: 2035 case Op_SaturatingSubV: 2036 if (!is_subword_type(bt)) { 2037 return false; 2038 } 2039 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2040 return false; // Implementation limitation 2041 } 2042 return true; 2043 2044 case Op_VectorMaskCmp: 2045 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2046 return false; // Implementation limitation 2047 } 2048 return true; 2049 2050 case Op_VectorRearrange: 2051 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2052 return false; // Implementation limitation 2053 } 2054 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2055 return false; // Implementation limitation 2056 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2057 return false; // Implementation limitation 2058 } 2059 return true; 2060 2061 // Binary Logical operations 2062 case Op_AndVMask: 2063 case Op_OrVMask: 2064 case Op_XorVMask: 2065 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2066 return false; // Implementation limitation 2067 } 2068 return true; 2069 2070 case Op_PopCountVI: 2071 case Op_PopCountVL: 2072 if (!is_pop_count_instr_target(bt)) { 2073 return false; 2074 } 2075 return true; 2076 2077 case Op_MaskAll: 2078 return true; 2079 2080 case Op_CountLeadingZerosV: 2081 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2082 return true; 2083 } 2084 default: 2085 return false; 2086 } 2087 } 2088 2089 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2090 return false; 2091 } 2092 2093 // Return true if Vector::rearrange needs preparation of the shuffle argument 2094 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2095 switch (elem_bt) { 2096 case T_BYTE: return false; 2097 case T_SHORT: return !VM_Version::supports_avx512bw(); 2098 case T_INT: return !VM_Version::supports_avx(); 2099 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2100 default: 2101 ShouldNotReachHere(); 2102 return false; 2103 } 2104 } 2105 2106 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2107 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2108 bool legacy = (generic_opnd->opcode() == LEGVEC); 2109 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2110 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2111 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2112 return new legVecZOper(); 2113 } 2114 if (legacy) { 2115 switch (ideal_reg) { 2116 case Op_VecS: return new legVecSOper(); 2117 case Op_VecD: return new legVecDOper(); 2118 case Op_VecX: return new legVecXOper(); 2119 case Op_VecY: return new legVecYOper(); 2120 case Op_VecZ: return new legVecZOper(); 2121 } 2122 } else { 2123 switch (ideal_reg) { 2124 case Op_VecS: return new vecSOper(); 2125 case Op_VecD: return new vecDOper(); 2126 case Op_VecX: return new vecXOper(); 2127 case Op_VecY: return new vecYOper(); 2128 case Op_VecZ: return new vecZOper(); 2129 } 2130 } 2131 ShouldNotReachHere(); 2132 return nullptr; 2133 } 2134 2135 bool Matcher::is_reg2reg_move(MachNode* m) { 2136 switch (m->rule()) { 2137 case MoveVec2Leg_rule: 2138 case MoveLeg2Vec_rule: 2139 case MoveF2VL_rule: 2140 case MoveF2LEG_rule: 2141 case MoveVL2F_rule: 2142 case MoveLEG2F_rule: 2143 case MoveD2VL_rule: 2144 case MoveD2LEG_rule: 2145 case MoveVL2D_rule: 2146 case MoveLEG2D_rule: 2147 return true; 2148 default: 2149 return false; 2150 } 2151 } 2152 2153 bool Matcher::is_generic_vector(MachOper* opnd) { 2154 switch (opnd->opcode()) { 2155 case VEC: 2156 case LEGVEC: 2157 return true; 2158 default: 2159 return false; 2160 } 2161 } 2162 2163 //------------------------------------------------------------------------ 2164 2165 const RegMask* Matcher::predicate_reg_mask(void) { 2166 return &_VECTMASK_REG_mask; 2167 } 2168 2169 // Max vector size in bytes. 0 if not supported. 2170 int Matcher::vector_width_in_bytes(BasicType bt) { 2171 assert(is_java_primitive(bt), "only primitive type vectors"); 2172 // SSE2 supports 128bit vectors for all types. 2173 // AVX2 supports 256bit vectors for all types. 2174 // AVX2/EVEX supports 512bit vectors for all types. 2175 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2176 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2177 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2178 size = (UseAVX > 2) ? 64 : 32; 2179 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2180 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2181 // Use flag to limit vector size. 2182 size = MIN2(size,(int)MaxVectorSize); 2183 // Minimum 2 values in vector (or 4 for bytes). 2184 switch (bt) { 2185 case T_DOUBLE: 2186 case T_LONG: 2187 if (size < 16) return 0; 2188 break; 2189 case T_FLOAT: 2190 case T_INT: 2191 if (size < 8) return 0; 2192 break; 2193 case T_BOOLEAN: 2194 if (size < 4) return 0; 2195 break; 2196 case T_CHAR: 2197 if (size < 4) return 0; 2198 break; 2199 case T_BYTE: 2200 if (size < 4) return 0; 2201 break; 2202 case T_SHORT: 2203 if (size < 4) return 0; 2204 break; 2205 default: 2206 ShouldNotReachHere(); 2207 } 2208 return size; 2209 } 2210 2211 // Limits on vector size (number of elements) loaded into vector. 2212 int Matcher::max_vector_size(const BasicType bt) { 2213 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2214 } 2215 int Matcher::min_vector_size(const BasicType bt) { 2216 int max_size = max_vector_size(bt); 2217 // Min size which can be loaded into vector is 4 bytes. 2218 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2219 // Support for calling svml double64 vectors 2220 if (bt == T_DOUBLE) { 2221 size = 1; 2222 } 2223 return MIN2(size,max_size); 2224 } 2225 2226 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2227 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2228 // by default on Cascade Lake 2229 if (VM_Version::is_default_intel_cascade_lake()) { 2230 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2231 } 2232 return Matcher::max_vector_size(bt); 2233 } 2234 2235 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2236 return -1; 2237 } 2238 2239 // Vector ideal reg corresponding to specified size in bytes 2240 uint Matcher::vector_ideal_reg(int size) { 2241 assert(MaxVectorSize >= size, ""); 2242 switch(size) { 2243 case 4: return Op_VecS; 2244 case 8: return Op_VecD; 2245 case 16: return Op_VecX; 2246 case 32: return Op_VecY; 2247 case 64: return Op_VecZ; 2248 } 2249 ShouldNotReachHere(); 2250 return 0; 2251 } 2252 2253 // Check for shift by small constant as well 2254 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2255 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2256 shift->in(2)->get_int() <= 3 && 2257 // Are there other uses besides address expressions? 2258 !matcher->is_visited(shift)) { 2259 address_visited.set(shift->_idx); // Flag as address_visited 2260 mstack.push(shift->in(2), Matcher::Visit); 2261 Node *conv = shift->in(1); 2262 // Allow Matcher to match the rule which bypass 2263 // ConvI2L operation for an array index on LP64 2264 // if the index value is positive. 2265 if (conv->Opcode() == Op_ConvI2L && 2266 conv->as_Type()->type()->is_long()->_lo >= 0 && 2267 // Are there other uses besides address expressions? 2268 !matcher->is_visited(conv)) { 2269 address_visited.set(conv->_idx); // Flag as address_visited 2270 mstack.push(conv->in(1), Matcher::Pre_Visit); 2271 } else { 2272 mstack.push(conv, Matcher::Pre_Visit); 2273 } 2274 return true; 2275 } 2276 return false; 2277 } 2278 2279 // This function identifies sub-graphs in which a 'load' node is 2280 // input to two different nodes, and such that it can be matched 2281 // with BMI instructions like blsi, blsr, etc. 2282 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2283 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2284 // refers to the same node. 2285 // 2286 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2287 // This is a temporary solution until we make DAGs expressible in ADL. 2288 template<typename ConType> 2289 class FusedPatternMatcher { 2290 Node* _op1_node; 2291 Node* _mop_node; 2292 int _con_op; 2293 2294 static int match_next(Node* n, int next_op, int next_op_idx) { 2295 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2296 return -1; 2297 } 2298 2299 if (next_op_idx == -1) { // n is commutative, try rotations 2300 if (n->in(1)->Opcode() == next_op) { 2301 return 1; 2302 } else if (n->in(2)->Opcode() == next_op) { 2303 return 2; 2304 } 2305 } else { 2306 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2307 if (n->in(next_op_idx)->Opcode() == next_op) { 2308 return next_op_idx; 2309 } 2310 } 2311 return -1; 2312 } 2313 2314 public: 2315 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2316 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2317 2318 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2319 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2320 typename ConType::NativeType con_value) { 2321 if (_op1_node->Opcode() != op1) { 2322 return false; 2323 } 2324 if (_mop_node->outcnt() > 2) { 2325 return false; 2326 } 2327 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2328 if (op1_op2_idx == -1) { 2329 return false; 2330 } 2331 // Memory operation must be the other edge 2332 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2333 2334 // Check that the mop node is really what we want 2335 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2336 Node* op2_node = _op1_node->in(op1_op2_idx); 2337 if (op2_node->outcnt() > 1) { 2338 return false; 2339 } 2340 assert(op2_node->Opcode() == op2, "Should be"); 2341 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2342 if (op2_con_idx == -1) { 2343 return false; 2344 } 2345 // Memory operation must be the other edge 2346 int op2_mop_idx = (op2_con_idx & 1) + 1; 2347 // Check that the memory operation is the same node 2348 if (op2_node->in(op2_mop_idx) == _mop_node) { 2349 // Now check the constant 2350 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2351 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2352 return true; 2353 } 2354 } 2355 } 2356 return false; 2357 } 2358 }; 2359 2360 static bool is_bmi_pattern(Node* n, Node* m) { 2361 assert(UseBMI1Instructions, "sanity"); 2362 if (n != nullptr && m != nullptr) { 2363 if (m->Opcode() == Op_LoadI) { 2364 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2365 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2366 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2367 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2368 } else if (m->Opcode() == Op_LoadL) { 2369 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2370 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2371 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2372 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2373 } 2374 } 2375 return false; 2376 } 2377 2378 // Should the matcher clone input 'm' of node 'n'? 2379 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2380 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2381 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2382 mstack.push(m, Visit); 2383 return true; 2384 } 2385 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2386 mstack.push(m, Visit); // m = ShiftCntV 2387 return true; 2388 } 2389 if (is_encode_and_store_pattern(n, m)) { 2390 mstack.push(m, Visit); 2391 return true; 2392 } 2393 return false; 2394 } 2395 2396 // Should the Matcher clone shifts on addressing modes, expecting them 2397 // to be subsumed into complex addressing expressions or compute them 2398 // into registers? 2399 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2400 Node *off = m->in(AddPNode::Offset); 2401 if (off->is_Con()) { 2402 address_visited.test_set(m->_idx); // Flag as address_visited 2403 Node *adr = m->in(AddPNode::Address); 2404 2405 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2406 // AtomicAdd is not an addressing expression. 2407 // Cheap to find it by looking for screwy base. 2408 if (adr->is_AddP() && 2409 !adr->in(AddPNode::Base)->is_top() && 2410 !adr->in(AddPNode::Offset)->is_Con() && 2411 off->get_long() == (int) (off->get_long()) && // immL32 2412 // Are there other uses besides address expressions? 2413 !is_visited(adr)) { 2414 address_visited.set(adr->_idx); // Flag as address_visited 2415 Node *shift = adr->in(AddPNode::Offset); 2416 if (!clone_shift(shift, this, mstack, address_visited)) { 2417 mstack.push(shift, Pre_Visit); 2418 } 2419 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2420 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2421 } else { 2422 mstack.push(adr, Pre_Visit); 2423 } 2424 2425 // Clone X+offset as it also folds into most addressing expressions 2426 mstack.push(off, Visit); 2427 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2428 return true; 2429 } else if (clone_shift(off, this, mstack, address_visited)) { 2430 address_visited.test_set(m->_idx); // Flag as address_visited 2431 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2432 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2433 return true; 2434 } 2435 return false; 2436 } 2437 2438 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2439 switch (bt) { 2440 case BoolTest::eq: 2441 return Assembler::eq; 2442 case BoolTest::ne: 2443 return Assembler::neq; 2444 case BoolTest::le: 2445 case BoolTest::ule: 2446 return Assembler::le; 2447 case BoolTest::ge: 2448 case BoolTest::uge: 2449 return Assembler::nlt; 2450 case BoolTest::lt: 2451 case BoolTest::ult: 2452 return Assembler::lt; 2453 case BoolTest::gt: 2454 case BoolTest::ugt: 2455 return Assembler::nle; 2456 default : ShouldNotReachHere(); return Assembler::_false; 2457 } 2458 } 2459 2460 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2461 switch (bt) { 2462 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2463 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2464 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2465 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2466 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2467 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2468 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2469 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2470 } 2471 } 2472 2473 // Helper methods for MachSpillCopyNode::implementation(). 2474 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2475 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2476 assert(ireg == Op_VecS || // 32bit vector 2477 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2478 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2479 "no non-adjacent vector moves" ); 2480 if (masm) { 2481 switch (ireg) { 2482 case Op_VecS: // copy whole register 2483 case Op_VecD: 2484 case Op_VecX: 2485 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2486 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2487 } else { 2488 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2489 } 2490 break; 2491 case Op_VecY: 2492 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2493 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2494 } else { 2495 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2496 } 2497 break; 2498 case Op_VecZ: 2499 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2500 break; 2501 default: 2502 ShouldNotReachHere(); 2503 } 2504 #ifndef PRODUCT 2505 } else { 2506 switch (ireg) { 2507 case Op_VecS: 2508 case Op_VecD: 2509 case Op_VecX: 2510 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2511 break; 2512 case Op_VecY: 2513 case Op_VecZ: 2514 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2515 break; 2516 default: 2517 ShouldNotReachHere(); 2518 } 2519 #endif 2520 } 2521 } 2522 2523 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2524 int stack_offset, int reg, uint ireg, outputStream* st) { 2525 if (masm) { 2526 if (is_load) { 2527 switch (ireg) { 2528 case Op_VecS: 2529 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecD: 2532 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2533 break; 2534 case Op_VecX: 2535 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2536 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2537 } else { 2538 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2539 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2540 } 2541 break; 2542 case Op_VecY: 2543 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2544 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2545 } else { 2546 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2547 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2548 } 2549 break; 2550 case Op_VecZ: 2551 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2552 break; 2553 default: 2554 ShouldNotReachHere(); 2555 } 2556 } else { // store 2557 switch (ireg) { 2558 case Op_VecS: 2559 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecD: 2562 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2563 break; 2564 case Op_VecX: 2565 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2566 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2567 } 2568 else { 2569 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2570 } 2571 break; 2572 case Op_VecY: 2573 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2574 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2575 } 2576 else { 2577 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2578 } 2579 break; 2580 case Op_VecZ: 2581 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2582 break; 2583 default: 2584 ShouldNotReachHere(); 2585 } 2586 } 2587 #ifndef PRODUCT 2588 } else { 2589 if (is_load) { 2590 switch (ireg) { 2591 case Op_VecS: 2592 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecD: 2595 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecX: 2598 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2599 break; 2600 case Op_VecY: 2601 case Op_VecZ: 2602 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2603 break; 2604 default: 2605 ShouldNotReachHere(); 2606 } 2607 } else { // store 2608 switch (ireg) { 2609 case Op_VecS: 2610 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecD: 2613 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecX: 2616 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2617 break; 2618 case Op_VecY: 2619 case Op_VecZ: 2620 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2621 break; 2622 default: 2623 ShouldNotReachHere(); 2624 } 2625 } 2626 #endif 2627 } 2628 } 2629 2630 template <class T> 2631 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2632 int size = type2aelembytes(bt) * len; 2633 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2634 for (int i = 0; i < len; i++) { 2635 int offset = i * type2aelembytes(bt); 2636 switch (bt) { 2637 case T_BYTE: val->at(i) = con; break; 2638 case T_SHORT: { 2639 jshort c = con; 2640 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2641 break; 2642 } 2643 case T_INT: { 2644 jint c = con; 2645 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2646 break; 2647 } 2648 case T_LONG: { 2649 jlong c = con; 2650 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2651 break; 2652 } 2653 case T_FLOAT: { 2654 jfloat c = con; 2655 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2656 break; 2657 } 2658 case T_DOUBLE: { 2659 jdouble c = con; 2660 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2661 break; 2662 } 2663 default: assert(false, "%s", type2name(bt)); 2664 } 2665 } 2666 return val; 2667 } 2668 2669 static inline jlong high_bit_set(BasicType bt) { 2670 switch (bt) { 2671 case T_BYTE: return 0x8080808080808080; 2672 case T_SHORT: return 0x8000800080008000; 2673 case T_INT: return 0x8000000080000000; 2674 case T_LONG: return 0x8000000000000000; 2675 default: 2676 ShouldNotReachHere(); 2677 return 0; 2678 } 2679 } 2680 2681 #ifndef PRODUCT 2682 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2683 st->print("nop \t# %d bytes pad for loops and calls", _count); 2684 } 2685 #endif 2686 2687 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2688 __ nop(_count); 2689 } 2690 2691 uint MachNopNode::size(PhaseRegAlloc*) const { 2692 return _count; 2693 } 2694 2695 #ifndef PRODUCT 2696 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2697 st->print("# breakpoint"); 2698 } 2699 #endif 2700 2701 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2702 __ int3(); 2703 } 2704 2705 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2706 return MachNode::size(ra_); 2707 } 2708 2709 %} 2710 2711 encode %{ 2712 2713 enc_class call_epilog %{ 2714 if (VerifyStackAtCalls) { 2715 // Check that stack depth is unchanged: find majik cookie on stack 2716 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2717 Label L; 2718 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2719 __ jccb(Assembler::equal, L); 2720 // Die if stack mismatch 2721 __ int3(); 2722 __ bind(L); 2723 } 2724 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) { 2725 // The last return value is not set by the callee but used to pass the null marker to compiled code. 2726 // Search for the corresponding projection, get the register and emit code that initialized it. 2727 uint con = (tf()->range_cc()->cnt() - 1); 2728 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2729 ProjNode* proj = fast_out(i)->as_Proj(); 2730 if (proj->_con == con) { 2731 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized) 2732 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2733 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2734 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2735 __ testq(rax, rax); 2736 __ setb(Assembler::notZero, toReg); 2737 __ movzbl(toReg, toReg); 2738 if (reg->is_stack()) { 2739 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2740 __ movq(Address(rsp, st_off), toReg); 2741 } 2742 break; 2743 } 2744 } 2745 if (return_value_is_used()) { 2746 // An inline type is returned as fields in multiple registers. 2747 // Rax either contains an oop if the inline type is buffered or a pointer 2748 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2749 // if the lowest bit is set to allow C2 to use the oop after null checking. 2750 // rax &= (rax & 1) - 1 2751 __ movptr(rscratch1, rax); 2752 __ andptr(rscratch1, 0x1); 2753 __ subptr(rscratch1, 0x1); 2754 __ andptr(rax, rscratch1); 2755 } 2756 } 2757 %} 2758 2759 %} 2760 2761 // Operands for bound floating pointer register arguments 2762 operand rxmm0() %{ 2763 constraint(ALLOC_IN_RC(xmm0_reg)); 2764 match(VecX); 2765 format%{%} 2766 interface(REG_INTER); 2767 %} 2768 2769 //----------OPERANDS----------------------------------------------------------- 2770 // Operand definitions must precede instruction definitions for correct parsing 2771 // in the ADLC because operands constitute user defined types which are used in 2772 // instruction definitions. 2773 2774 // Vectors 2775 2776 // Dummy generic vector class. Should be used for all vector operands. 2777 // Replaced with vec[SDXYZ] during post-selection pass. 2778 operand vec() %{ 2779 constraint(ALLOC_IN_RC(dynamic)); 2780 match(VecX); 2781 match(VecY); 2782 match(VecZ); 2783 match(VecS); 2784 match(VecD); 2785 2786 format %{ %} 2787 interface(REG_INTER); 2788 %} 2789 2790 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2791 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2792 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2793 // runtime code generation via reg_class_dynamic. 2794 operand legVec() %{ 2795 constraint(ALLOC_IN_RC(dynamic)); 2796 match(VecX); 2797 match(VecY); 2798 match(VecZ); 2799 match(VecS); 2800 match(VecD); 2801 2802 format %{ %} 2803 interface(REG_INTER); 2804 %} 2805 2806 // Replaces vec during post-selection cleanup. See above. 2807 operand vecS() %{ 2808 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2809 match(VecS); 2810 2811 format %{ %} 2812 interface(REG_INTER); 2813 %} 2814 2815 // Replaces legVec during post-selection cleanup. See above. 2816 operand legVecS() %{ 2817 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2818 match(VecS); 2819 2820 format %{ %} 2821 interface(REG_INTER); 2822 %} 2823 2824 // Replaces vec during post-selection cleanup. See above. 2825 operand vecD() %{ 2826 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2827 match(VecD); 2828 2829 format %{ %} 2830 interface(REG_INTER); 2831 %} 2832 2833 // Replaces legVec during post-selection cleanup. See above. 2834 operand legVecD() %{ 2835 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2836 match(VecD); 2837 2838 format %{ %} 2839 interface(REG_INTER); 2840 %} 2841 2842 // Replaces vec during post-selection cleanup. See above. 2843 operand vecX() %{ 2844 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2845 match(VecX); 2846 2847 format %{ %} 2848 interface(REG_INTER); 2849 %} 2850 2851 // Replaces legVec during post-selection cleanup. See above. 2852 operand legVecX() %{ 2853 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2854 match(VecX); 2855 2856 format %{ %} 2857 interface(REG_INTER); 2858 %} 2859 2860 // Replaces vec during post-selection cleanup. See above. 2861 operand vecY() %{ 2862 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2863 match(VecY); 2864 2865 format %{ %} 2866 interface(REG_INTER); 2867 %} 2868 2869 // Replaces legVec during post-selection cleanup. See above. 2870 operand legVecY() %{ 2871 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2872 match(VecY); 2873 2874 format %{ %} 2875 interface(REG_INTER); 2876 %} 2877 2878 // Replaces vec during post-selection cleanup. See above. 2879 operand vecZ() %{ 2880 constraint(ALLOC_IN_RC(vectorz_reg)); 2881 match(VecZ); 2882 2883 format %{ %} 2884 interface(REG_INTER); 2885 %} 2886 2887 // Replaces legVec during post-selection cleanup. See above. 2888 operand legVecZ() %{ 2889 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2890 match(VecZ); 2891 2892 format %{ %} 2893 interface(REG_INTER); 2894 %} 2895 2896 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2897 2898 // ============================================================================ 2899 2900 instruct ShouldNotReachHere() %{ 2901 match(Halt); 2902 format %{ "stop\t# ShouldNotReachHere" %} 2903 ins_encode %{ 2904 if (is_reachable()) { 2905 const char* str = __ code_string(_halt_reason); 2906 __ stop(str); 2907 } 2908 %} 2909 ins_pipe(pipe_slow); 2910 %} 2911 2912 // ============================================================================ 2913 2914 instruct addF_reg(regF dst, regF src) %{ 2915 predicate(UseAVX == 0); 2916 match(Set dst (AddF dst src)); 2917 2918 format %{ "addss $dst, $src" %} 2919 ins_cost(150); 2920 ins_encode %{ 2921 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2922 %} 2923 ins_pipe(pipe_slow); 2924 %} 2925 2926 instruct addF_mem(regF dst, memory src) %{ 2927 predicate(UseAVX == 0); 2928 match(Set dst (AddF dst (LoadF src))); 2929 2930 format %{ "addss $dst, $src" %} 2931 ins_cost(150); 2932 ins_encode %{ 2933 __ addss($dst$$XMMRegister, $src$$Address); 2934 %} 2935 ins_pipe(pipe_slow); 2936 %} 2937 2938 instruct addF_imm(regF dst, immF con) %{ 2939 predicate(UseAVX == 0); 2940 match(Set dst (AddF dst con)); 2941 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2942 ins_cost(150); 2943 ins_encode %{ 2944 __ addss($dst$$XMMRegister, $constantaddress($con)); 2945 %} 2946 ins_pipe(pipe_slow); 2947 %} 2948 2949 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2950 predicate(UseAVX > 0); 2951 match(Set dst (AddF src1 src2)); 2952 2953 format %{ "vaddss $dst, $src1, $src2" %} 2954 ins_cost(150); 2955 ins_encode %{ 2956 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2957 %} 2958 ins_pipe(pipe_slow); 2959 %} 2960 2961 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2962 predicate(UseAVX > 0); 2963 match(Set dst (AddF src1 (LoadF src2))); 2964 2965 format %{ "vaddss $dst, $src1, $src2" %} 2966 ins_cost(150); 2967 ins_encode %{ 2968 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2969 %} 2970 ins_pipe(pipe_slow); 2971 %} 2972 2973 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2974 predicate(UseAVX > 0); 2975 match(Set dst (AddF src con)); 2976 2977 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2978 ins_cost(150); 2979 ins_encode %{ 2980 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2981 %} 2982 ins_pipe(pipe_slow); 2983 %} 2984 2985 instruct addD_reg(regD dst, regD src) %{ 2986 predicate(UseAVX == 0); 2987 match(Set dst (AddD dst src)); 2988 2989 format %{ "addsd $dst, $src" %} 2990 ins_cost(150); 2991 ins_encode %{ 2992 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2993 %} 2994 ins_pipe(pipe_slow); 2995 %} 2996 2997 instruct addD_mem(regD dst, memory src) %{ 2998 predicate(UseAVX == 0); 2999 match(Set dst (AddD dst (LoadD src))); 3000 3001 format %{ "addsd $dst, $src" %} 3002 ins_cost(150); 3003 ins_encode %{ 3004 __ addsd($dst$$XMMRegister, $src$$Address); 3005 %} 3006 ins_pipe(pipe_slow); 3007 %} 3008 3009 instruct addD_imm(regD dst, immD con) %{ 3010 predicate(UseAVX == 0); 3011 match(Set dst (AddD dst con)); 3012 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3013 ins_cost(150); 3014 ins_encode %{ 3015 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3021 predicate(UseAVX > 0); 3022 match(Set dst (AddD src1 src2)); 3023 3024 format %{ "vaddsd $dst, $src1, $src2" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3033 predicate(UseAVX > 0); 3034 match(Set dst (AddD src1 (LoadD src2))); 3035 3036 format %{ "vaddsd $dst, $src1, $src2" %} 3037 ins_cost(150); 3038 ins_encode %{ 3039 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3040 %} 3041 ins_pipe(pipe_slow); 3042 %} 3043 3044 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3045 predicate(UseAVX > 0); 3046 match(Set dst (AddD src con)); 3047 3048 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3049 ins_cost(150); 3050 ins_encode %{ 3051 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3052 %} 3053 ins_pipe(pipe_slow); 3054 %} 3055 3056 instruct subF_reg(regF dst, regF src) %{ 3057 predicate(UseAVX == 0); 3058 match(Set dst (SubF dst src)); 3059 3060 format %{ "subss $dst, $src" %} 3061 ins_cost(150); 3062 ins_encode %{ 3063 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3064 %} 3065 ins_pipe(pipe_slow); 3066 %} 3067 3068 instruct subF_mem(regF dst, memory src) %{ 3069 predicate(UseAVX == 0); 3070 match(Set dst (SubF dst (LoadF src))); 3071 3072 format %{ "subss $dst, $src" %} 3073 ins_cost(150); 3074 ins_encode %{ 3075 __ subss($dst$$XMMRegister, $src$$Address); 3076 %} 3077 ins_pipe(pipe_slow); 3078 %} 3079 3080 instruct subF_imm(regF dst, immF con) %{ 3081 predicate(UseAVX == 0); 3082 match(Set dst (SubF dst con)); 3083 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3084 ins_cost(150); 3085 ins_encode %{ 3086 __ subss($dst$$XMMRegister, $constantaddress($con)); 3087 %} 3088 ins_pipe(pipe_slow); 3089 %} 3090 3091 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3092 predicate(UseAVX > 0); 3093 match(Set dst (SubF src1 src2)); 3094 3095 format %{ "vsubss $dst, $src1, $src2" %} 3096 ins_cost(150); 3097 ins_encode %{ 3098 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3099 %} 3100 ins_pipe(pipe_slow); 3101 %} 3102 3103 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3104 predicate(UseAVX > 0); 3105 match(Set dst (SubF src1 (LoadF src2))); 3106 3107 format %{ "vsubss $dst, $src1, $src2" %} 3108 ins_cost(150); 3109 ins_encode %{ 3110 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3111 %} 3112 ins_pipe(pipe_slow); 3113 %} 3114 3115 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3116 predicate(UseAVX > 0); 3117 match(Set dst (SubF src con)); 3118 3119 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3120 ins_cost(150); 3121 ins_encode %{ 3122 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3123 %} 3124 ins_pipe(pipe_slow); 3125 %} 3126 3127 instruct subD_reg(regD dst, regD src) %{ 3128 predicate(UseAVX == 0); 3129 match(Set dst (SubD dst src)); 3130 3131 format %{ "subsd $dst, $src" %} 3132 ins_cost(150); 3133 ins_encode %{ 3134 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3135 %} 3136 ins_pipe(pipe_slow); 3137 %} 3138 3139 instruct subD_mem(regD dst, memory src) %{ 3140 predicate(UseAVX == 0); 3141 match(Set dst (SubD dst (LoadD src))); 3142 3143 format %{ "subsd $dst, $src" %} 3144 ins_cost(150); 3145 ins_encode %{ 3146 __ subsd($dst$$XMMRegister, $src$$Address); 3147 %} 3148 ins_pipe(pipe_slow); 3149 %} 3150 3151 instruct subD_imm(regD dst, immD con) %{ 3152 predicate(UseAVX == 0); 3153 match(Set dst (SubD dst con)); 3154 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3155 ins_cost(150); 3156 ins_encode %{ 3157 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3158 %} 3159 ins_pipe(pipe_slow); 3160 %} 3161 3162 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3163 predicate(UseAVX > 0); 3164 match(Set dst (SubD src1 src2)); 3165 3166 format %{ "vsubsd $dst, $src1, $src2" %} 3167 ins_cost(150); 3168 ins_encode %{ 3169 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3170 %} 3171 ins_pipe(pipe_slow); 3172 %} 3173 3174 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3175 predicate(UseAVX > 0); 3176 match(Set dst (SubD src1 (LoadD src2))); 3177 3178 format %{ "vsubsd $dst, $src1, $src2" %} 3179 ins_cost(150); 3180 ins_encode %{ 3181 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3182 %} 3183 ins_pipe(pipe_slow); 3184 %} 3185 3186 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3187 predicate(UseAVX > 0); 3188 match(Set dst (SubD src con)); 3189 3190 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3191 ins_cost(150); 3192 ins_encode %{ 3193 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3194 %} 3195 ins_pipe(pipe_slow); 3196 %} 3197 3198 instruct mulF_reg(regF dst, regF src) %{ 3199 predicate(UseAVX == 0); 3200 match(Set dst (MulF dst src)); 3201 3202 format %{ "mulss $dst, $src" %} 3203 ins_cost(150); 3204 ins_encode %{ 3205 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3206 %} 3207 ins_pipe(pipe_slow); 3208 %} 3209 3210 instruct mulF_mem(regF dst, memory src) %{ 3211 predicate(UseAVX == 0); 3212 match(Set dst (MulF dst (LoadF src))); 3213 3214 format %{ "mulss $dst, $src" %} 3215 ins_cost(150); 3216 ins_encode %{ 3217 __ mulss($dst$$XMMRegister, $src$$Address); 3218 %} 3219 ins_pipe(pipe_slow); 3220 %} 3221 3222 instruct mulF_imm(regF dst, immF con) %{ 3223 predicate(UseAVX == 0); 3224 match(Set dst (MulF dst con)); 3225 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3226 ins_cost(150); 3227 ins_encode %{ 3228 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3229 %} 3230 ins_pipe(pipe_slow); 3231 %} 3232 3233 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3234 predicate(UseAVX > 0); 3235 match(Set dst (MulF src1 src2)); 3236 3237 format %{ "vmulss $dst, $src1, $src2" %} 3238 ins_cost(150); 3239 ins_encode %{ 3240 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3241 %} 3242 ins_pipe(pipe_slow); 3243 %} 3244 3245 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3246 predicate(UseAVX > 0); 3247 match(Set dst (MulF src1 (LoadF src2))); 3248 3249 format %{ "vmulss $dst, $src1, $src2" %} 3250 ins_cost(150); 3251 ins_encode %{ 3252 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3253 %} 3254 ins_pipe(pipe_slow); 3255 %} 3256 3257 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3258 predicate(UseAVX > 0); 3259 match(Set dst (MulF src con)); 3260 3261 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3262 ins_cost(150); 3263 ins_encode %{ 3264 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3265 %} 3266 ins_pipe(pipe_slow); 3267 %} 3268 3269 instruct mulD_reg(regD dst, regD src) %{ 3270 predicate(UseAVX == 0); 3271 match(Set dst (MulD dst src)); 3272 3273 format %{ "mulsd $dst, $src" %} 3274 ins_cost(150); 3275 ins_encode %{ 3276 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3277 %} 3278 ins_pipe(pipe_slow); 3279 %} 3280 3281 instruct mulD_mem(regD dst, memory src) %{ 3282 predicate(UseAVX == 0); 3283 match(Set dst (MulD dst (LoadD src))); 3284 3285 format %{ "mulsd $dst, $src" %} 3286 ins_cost(150); 3287 ins_encode %{ 3288 __ mulsd($dst$$XMMRegister, $src$$Address); 3289 %} 3290 ins_pipe(pipe_slow); 3291 %} 3292 3293 instruct mulD_imm(regD dst, immD con) %{ 3294 predicate(UseAVX == 0); 3295 match(Set dst (MulD dst con)); 3296 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3297 ins_cost(150); 3298 ins_encode %{ 3299 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3300 %} 3301 ins_pipe(pipe_slow); 3302 %} 3303 3304 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3305 predicate(UseAVX > 0); 3306 match(Set dst (MulD src1 src2)); 3307 3308 format %{ "vmulsd $dst, $src1, $src2" %} 3309 ins_cost(150); 3310 ins_encode %{ 3311 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3312 %} 3313 ins_pipe(pipe_slow); 3314 %} 3315 3316 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3317 predicate(UseAVX > 0); 3318 match(Set dst (MulD src1 (LoadD src2))); 3319 3320 format %{ "vmulsd $dst, $src1, $src2" %} 3321 ins_cost(150); 3322 ins_encode %{ 3323 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3324 %} 3325 ins_pipe(pipe_slow); 3326 %} 3327 3328 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3329 predicate(UseAVX > 0); 3330 match(Set dst (MulD src con)); 3331 3332 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3333 ins_cost(150); 3334 ins_encode %{ 3335 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3336 %} 3337 ins_pipe(pipe_slow); 3338 %} 3339 3340 instruct divF_reg(regF dst, regF src) %{ 3341 predicate(UseAVX == 0); 3342 match(Set dst (DivF dst src)); 3343 3344 format %{ "divss $dst, $src" %} 3345 ins_cost(150); 3346 ins_encode %{ 3347 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3348 %} 3349 ins_pipe(pipe_slow); 3350 %} 3351 3352 instruct divF_mem(regF dst, memory src) %{ 3353 predicate(UseAVX == 0); 3354 match(Set dst (DivF dst (LoadF src))); 3355 3356 format %{ "divss $dst, $src" %} 3357 ins_cost(150); 3358 ins_encode %{ 3359 __ divss($dst$$XMMRegister, $src$$Address); 3360 %} 3361 ins_pipe(pipe_slow); 3362 %} 3363 3364 instruct divF_imm(regF dst, immF con) %{ 3365 predicate(UseAVX == 0); 3366 match(Set dst (DivF dst con)); 3367 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3368 ins_cost(150); 3369 ins_encode %{ 3370 __ divss($dst$$XMMRegister, $constantaddress($con)); 3371 %} 3372 ins_pipe(pipe_slow); 3373 %} 3374 3375 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3376 predicate(UseAVX > 0); 3377 match(Set dst (DivF src1 src2)); 3378 3379 format %{ "vdivss $dst, $src1, $src2" %} 3380 ins_cost(150); 3381 ins_encode %{ 3382 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3383 %} 3384 ins_pipe(pipe_slow); 3385 %} 3386 3387 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3388 predicate(UseAVX > 0); 3389 match(Set dst (DivF src1 (LoadF src2))); 3390 3391 format %{ "vdivss $dst, $src1, $src2" %} 3392 ins_cost(150); 3393 ins_encode %{ 3394 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3395 %} 3396 ins_pipe(pipe_slow); 3397 %} 3398 3399 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3400 predicate(UseAVX > 0); 3401 match(Set dst (DivF src con)); 3402 3403 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3404 ins_cost(150); 3405 ins_encode %{ 3406 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3407 %} 3408 ins_pipe(pipe_slow); 3409 %} 3410 3411 instruct divD_reg(regD dst, regD src) %{ 3412 predicate(UseAVX == 0); 3413 match(Set dst (DivD dst src)); 3414 3415 format %{ "divsd $dst, $src" %} 3416 ins_cost(150); 3417 ins_encode %{ 3418 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3419 %} 3420 ins_pipe(pipe_slow); 3421 %} 3422 3423 instruct divD_mem(regD dst, memory src) %{ 3424 predicate(UseAVX == 0); 3425 match(Set dst (DivD dst (LoadD src))); 3426 3427 format %{ "divsd $dst, $src" %} 3428 ins_cost(150); 3429 ins_encode %{ 3430 __ divsd($dst$$XMMRegister, $src$$Address); 3431 %} 3432 ins_pipe(pipe_slow); 3433 %} 3434 3435 instruct divD_imm(regD dst, immD con) %{ 3436 predicate(UseAVX == 0); 3437 match(Set dst (DivD dst con)); 3438 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3439 ins_cost(150); 3440 ins_encode %{ 3441 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3442 %} 3443 ins_pipe(pipe_slow); 3444 %} 3445 3446 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3447 predicate(UseAVX > 0); 3448 match(Set dst (DivD src1 src2)); 3449 3450 format %{ "vdivsd $dst, $src1, $src2" %} 3451 ins_cost(150); 3452 ins_encode %{ 3453 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3454 %} 3455 ins_pipe(pipe_slow); 3456 %} 3457 3458 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3459 predicate(UseAVX > 0); 3460 match(Set dst (DivD src1 (LoadD src2))); 3461 3462 format %{ "vdivsd $dst, $src1, $src2" %} 3463 ins_cost(150); 3464 ins_encode %{ 3465 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3466 %} 3467 ins_pipe(pipe_slow); 3468 %} 3469 3470 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3471 predicate(UseAVX > 0); 3472 match(Set dst (DivD src con)); 3473 3474 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3475 ins_cost(150); 3476 ins_encode %{ 3477 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3478 %} 3479 ins_pipe(pipe_slow); 3480 %} 3481 3482 instruct absF_reg(regF dst) %{ 3483 predicate(UseAVX == 0); 3484 match(Set dst (AbsF dst)); 3485 ins_cost(150); 3486 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3487 ins_encode %{ 3488 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3489 %} 3490 ins_pipe(pipe_slow); 3491 %} 3492 3493 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3494 predicate(UseAVX > 0); 3495 match(Set dst (AbsF src)); 3496 ins_cost(150); 3497 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3498 ins_encode %{ 3499 int vlen_enc = Assembler::AVX_128bit; 3500 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3501 ExternalAddress(float_signmask()), vlen_enc); 3502 %} 3503 ins_pipe(pipe_slow); 3504 %} 3505 3506 instruct absD_reg(regD dst) %{ 3507 predicate(UseAVX == 0); 3508 match(Set dst (AbsD dst)); 3509 ins_cost(150); 3510 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3511 "# abs double by sign masking" %} 3512 ins_encode %{ 3513 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3514 %} 3515 ins_pipe(pipe_slow); 3516 %} 3517 3518 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3519 predicate(UseAVX > 0); 3520 match(Set dst (AbsD src)); 3521 ins_cost(150); 3522 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3523 "# abs double by sign masking" %} 3524 ins_encode %{ 3525 int vlen_enc = Assembler::AVX_128bit; 3526 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3527 ExternalAddress(double_signmask()), vlen_enc); 3528 %} 3529 ins_pipe(pipe_slow); 3530 %} 3531 3532 instruct negF_reg(regF dst) %{ 3533 predicate(UseAVX == 0); 3534 match(Set dst (NegF dst)); 3535 ins_cost(150); 3536 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3537 ins_encode %{ 3538 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3539 %} 3540 ins_pipe(pipe_slow); 3541 %} 3542 3543 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3544 predicate(UseAVX > 0); 3545 match(Set dst (NegF src)); 3546 ins_cost(150); 3547 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3548 ins_encode %{ 3549 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3550 ExternalAddress(float_signflip())); 3551 %} 3552 ins_pipe(pipe_slow); 3553 %} 3554 3555 instruct negD_reg(regD dst) %{ 3556 predicate(UseAVX == 0); 3557 match(Set dst (NegD dst)); 3558 ins_cost(150); 3559 format %{ "xorpd $dst, [0x8000000000000000]\t" 3560 "# neg double by sign flipping" %} 3561 ins_encode %{ 3562 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3563 %} 3564 ins_pipe(pipe_slow); 3565 %} 3566 3567 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3568 predicate(UseAVX > 0); 3569 match(Set dst (NegD src)); 3570 ins_cost(150); 3571 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3572 "# neg double by sign flipping" %} 3573 ins_encode %{ 3574 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3575 ExternalAddress(double_signflip())); 3576 %} 3577 ins_pipe(pipe_slow); 3578 %} 3579 3580 // sqrtss instruction needs destination register to be pre initialized for best performance 3581 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3582 instruct sqrtF_reg(regF dst) %{ 3583 match(Set dst (SqrtF dst)); 3584 format %{ "sqrtss $dst, $dst" %} 3585 ins_encode %{ 3586 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3587 %} 3588 ins_pipe(pipe_slow); 3589 %} 3590 3591 // sqrtsd instruction needs destination register to be pre initialized for best performance 3592 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3593 instruct sqrtD_reg(regD dst) %{ 3594 match(Set dst (SqrtD dst)); 3595 format %{ "sqrtsd $dst, $dst" %} 3596 ins_encode %{ 3597 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3598 %} 3599 ins_pipe(pipe_slow); 3600 %} 3601 3602 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3603 effect(TEMP tmp); 3604 match(Set dst (ConvF2HF src)); 3605 ins_cost(125); 3606 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3607 ins_encode %{ 3608 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3609 %} 3610 ins_pipe( pipe_slow ); 3611 %} 3612 3613 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3614 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3615 effect(TEMP ktmp, TEMP rtmp); 3616 match(Set mem (StoreC mem (ConvF2HF src))); 3617 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3618 ins_encode %{ 3619 __ movl($rtmp$$Register, 0x1); 3620 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3621 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3622 %} 3623 ins_pipe( pipe_slow ); 3624 %} 3625 3626 instruct vconvF2HF(vec dst, vec src) %{ 3627 match(Set dst (VectorCastF2HF src)); 3628 format %{ "vector_conv_F2HF $dst $src" %} 3629 ins_encode %{ 3630 int vlen_enc = vector_length_encoding(this, $src); 3631 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3632 %} 3633 ins_pipe( pipe_slow ); 3634 %} 3635 3636 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3637 predicate(n->as_StoreVector()->memory_size() >= 16); 3638 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3639 format %{ "vcvtps2ph $mem,$src" %} 3640 ins_encode %{ 3641 int vlen_enc = vector_length_encoding(this, $src); 3642 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3648 match(Set dst (ConvHF2F src)); 3649 format %{ "vcvtph2ps $dst,$src" %} 3650 ins_encode %{ 3651 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3657 match(Set dst (VectorCastHF2F (LoadVector mem))); 3658 format %{ "vcvtph2ps $dst,$mem" %} 3659 ins_encode %{ 3660 int vlen_enc = vector_length_encoding(this); 3661 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3662 %} 3663 ins_pipe( pipe_slow ); 3664 %} 3665 3666 instruct vconvHF2F(vec dst, vec src) %{ 3667 match(Set dst (VectorCastHF2F src)); 3668 ins_cost(125); 3669 format %{ "vector_conv_HF2F $dst,$src" %} 3670 ins_encode %{ 3671 int vlen_enc = vector_length_encoding(this); 3672 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 // ---------------------------------------- VectorReinterpret ------------------------------------ 3678 instruct reinterpret_mask(kReg dst) %{ 3679 predicate(n->bottom_type()->isa_vectmask() && 3680 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3681 match(Set dst (VectorReinterpret dst)); 3682 ins_cost(125); 3683 format %{ "vector_reinterpret $dst\t!" %} 3684 ins_encode %{ 3685 // empty 3686 %} 3687 ins_pipe( pipe_slow ); 3688 %} 3689 3690 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3691 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3692 n->bottom_type()->isa_vectmask() && 3693 n->in(1)->bottom_type()->isa_vectmask() && 3694 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3695 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3696 match(Set dst (VectorReinterpret src)); 3697 effect(TEMP xtmp); 3698 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3699 ins_encode %{ 3700 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3701 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3702 assert(src_sz == dst_sz , "src and dst size mismatch"); 3703 int vlen_enc = vector_length_encoding(src_sz); 3704 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3705 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3711 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3712 n->bottom_type()->isa_vectmask() && 3713 n->in(1)->bottom_type()->isa_vectmask() && 3714 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3715 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3716 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3717 match(Set dst (VectorReinterpret src)); 3718 effect(TEMP xtmp); 3719 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3720 ins_encode %{ 3721 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3722 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3723 assert(src_sz == dst_sz , "src and dst size mismatch"); 3724 int vlen_enc = vector_length_encoding(src_sz); 3725 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3726 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3732 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3733 n->bottom_type()->isa_vectmask() && 3734 n->in(1)->bottom_type()->isa_vectmask() && 3735 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3736 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3737 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3738 match(Set dst (VectorReinterpret src)); 3739 effect(TEMP xtmp); 3740 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3741 ins_encode %{ 3742 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3743 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3744 assert(src_sz == dst_sz , "src and dst size mismatch"); 3745 int vlen_enc = vector_length_encoding(src_sz); 3746 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3747 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3748 %} 3749 ins_pipe( pipe_slow ); 3750 %} 3751 3752 instruct reinterpret(vec dst) %{ 3753 predicate(!n->bottom_type()->isa_vectmask() && 3754 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3755 match(Set dst (VectorReinterpret dst)); 3756 ins_cost(125); 3757 format %{ "vector_reinterpret $dst\t!" %} 3758 ins_encode %{ 3759 // empty 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 instruct reinterpret_expand(vec dst, vec src) %{ 3765 predicate(UseAVX == 0 && 3766 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3767 match(Set dst (VectorReinterpret src)); 3768 ins_cost(125); 3769 effect(TEMP dst); 3770 format %{ "vector_reinterpret_expand $dst,$src" %} 3771 ins_encode %{ 3772 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3773 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3774 3775 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3776 if (src_vlen_in_bytes == 4) { 3777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3778 } else { 3779 assert(src_vlen_in_bytes == 8, ""); 3780 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3781 } 3782 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3788 predicate(UseAVX > 0 && 3789 !n->bottom_type()->isa_vectmask() && 3790 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3791 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3792 match(Set dst (VectorReinterpret src)); 3793 ins_cost(125); 3794 format %{ "vector_reinterpret_expand $dst,$src" %} 3795 ins_encode %{ 3796 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3797 %} 3798 ins_pipe( pipe_slow ); 3799 %} 3800 3801 3802 instruct vreinterpret_expand(legVec dst, vec src) %{ 3803 predicate(UseAVX > 0 && 3804 !n->bottom_type()->isa_vectmask() && 3805 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3806 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3807 match(Set dst (VectorReinterpret src)); 3808 ins_cost(125); 3809 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3810 ins_encode %{ 3811 switch (Matcher::vector_length_in_bytes(this, $src)) { 3812 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3813 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3814 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3815 default: ShouldNotReachHere(); 3816 } 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct reinterpret_shrink(vec dst, legVec src) %{ 3822 predicate(!n->bottom_type()->isa_vectmask() && 3823 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3824 match(Set dst (VectorReinterpret src)); 3825 ins_cost(125); 3826 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3827 ins_encode %{ 3828 switch (Matcher::vector_length_in_bytes(this)) { 3829 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3830 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3831 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3832 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3833 default: ShouldNotReachHere(); 3834 } 3835 %} 3836 ins_pipe( pipe_slow ); 3837 %} 3838 3839 // ---------------------------------------------------------------------------------------------------- 3840 3841 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3842 match(Set dst (RoundDoubleMode src rmode)); 3843 format %{ "roundsd $dst,$src" %} 3844 ins_cost(150); 3845 ins_encode %{ 3846 assert(UseSSE >= 4, "required"); 3847 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3848 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3849 } 3850 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3851 %} 3852 ins_pipe(pipe_slow); 3853 %} 3854 3855 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3856 match(Set dst (RoundDoubleMode con rmode)); 3857 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3858 ins_cost(150); 3859 ins_encode %{ 3860 assert(UseSSE >= 4, "required"); 3861 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3862 %} 3863 ins_pipe(pipe_slow); 3864 %} 3865 3866 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3867 predicate(Matcher::vector_length(n) < 8); 3868 match(Set dst (RoundDoubleModeV src rmode)); 3869 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3870 ins_encode %{ 3871 assert(UseAVX > 0, "required"); 3872 int vlen_enc = vector_length_encoding(this); 3873 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3874 %} 3875 ins_pipe( pipe_slow ); 3876 %} 3877 3878 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3879 predicate(Matcher::vector_length(n) == 8); 3880 match(Set dst (RoundDoubleModeV src rmode)); 3881 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3882 ins_encode %{ 3883 assert(UseAVX > 2, "required"); 3884 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3885 %} 3886 ins_pipe( pipe_slow ); 3887 %} 3888 3889 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3890 predicate(Matcher::vector_length(n) < 8); 3891 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3892 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3893 ins_encode %{ 3894 assert(UseAVX > 0, "required"); 3895 int vlen_enc = vector_length_encoding(this); 3896 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3897 %} 3898 ins_pipe( pipe_slow ); 3899 %} 3900 3901 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3902 predicate(Matcher::vector_length(n) == 8); 3903 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3904 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3905 ins_encode %{ 3906 assert(UseAVX > 2, "required"); 3907 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 instruct onspinwait() %{ 3913 match(OnSpinWait); 3914 ins_cost(200); 3915 3916 format %{ 3917 $$template 3918 $$emit$$"pause\t! membar_onspinwait" 3919 %} 3920 ins_encode %{ 3921 __ pause(); 3922 %} 3923 ins_pipe(pipe_slow); 3924 %} 3925 3926 // a * b + c 3927 instruct fmaD_reg(regD a, regD b, regD c) %{ 3928 match(Set c (FmaD c (Binary a b))); 3929 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3930 ins_cost(150); 3931 ins_encode %{ 3932 assert(UseFMA, "Needs FMA instructions support."); 3933 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3934 %} 3935 ins_pipe( pipe_slow ); 3936 %} 3937 3938 // a * b + c 3939 instruct fmaF_reg(regF a, regF b, regF c) %{ 3940 match(Set c (FmaF c (Binary a b))); 3941 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3942 ins_cost(150); 3943 ins_encode %{ 3944 assert(UseFMA, "Needs FMA instructions support."); 3945 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 // ====================VECTOR INSTRUCTIONS===================================== 3951 3952 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3953 instruct MoveVec2Leg(legVec dst, vec src) %{ 3954 match(Set dst src); 3955 format %{ "" %} 3956 ins_encode %{ 3957 ShouldNotReachHere(); 3958 %} 3959 ins_pipe( fpu_reg_reg ); 3960 %} 3961 3962 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3963 match(Set dst src); 3964 format %{ "" %} 3965 ins_encode %{ 3966 ShouldNotReachHere(); 3967 %} 3968 ins_pipe( fpu_reg_reg ); 3969 %} 3970 3971 // ============================================================================ 3972 3973 // Load vectors generic operand pattern 3974 instruct loadV(vec dst, memory mem) %{ 3975 match(Set dst (LoadVector mem)); 3976 ins_cost(125); 3977 format %{ "load_vector $dst,$mem" %} 3978 ins_encode %{ 3979 BasicType bt = Matcher::vector_element_basic_type(this); 3980 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3981 %} 3982 ins_pipe( pipe_slow ); 3983 %} 3984 3985 // Store vectors generic operand pattern. 3986 instruct storeV(memory mem, vec src) %{ 3987 match(Set mem (StoreVector mem src)); 3988 ins_cost(145); 3989 format %{ "store_vector $mem,$src\n\t" %} 3990 ins_encode %{ 3991 switch (Matcher::vector_length_in_bytes(this, $src)) { 3992 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3993 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3994 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3995 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3996 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3997 default: ShouldNotReachHere(); 3998 } 3999 %} 4000 ins_pipe( pipe_slow ); 4001 %} 4002 4003 // ---------------------------------------- Gather ------------------------------------ 4004 4005 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4006 4007 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4008 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4009 Matcher::vector_length_in_bytes(n) <= 32); 4010 match(Set dst (LoadVectorGather mem idx)); 4011 effect(TEMP dst, TEMP tmp, TEMP mask); 4012 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4013 ins_encode %{ 4014 int vlen_enc = vector_length_encoding(this); 4015 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4016 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4017 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4018 __ lea($tmp$$Register, $mem$$Address); 4019 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4020 %} 4021 ins_pipe( pipe_slow ); 4022 %} 4023 4024 4025 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4026 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4027 !is_subword_type(Matcher::vector_element_basic_type(n))); 4028 match(Set dst (LoadVectorGather mem idx)); 4029 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4030 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4031 ins_encode %{ 4032 int vlen_enc = vector_length_encoding(this); 4033 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4034 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4035 __ lea($tmp$$Register, $mem$$Address); 4036 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4037 %} 4038 ins_pipe( pipe_slow ); 4039 %} 4040 4041 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4042 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4043 !is_subword_type(Matcher::vector_element_basic_type(n))); 4044 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4045 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4046 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4047 ins_encode %{ 4048 assert(UseAVX > 2, "sanity"); 4049 int vlen_enc = vector_length_encoding(this); 4050 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4051 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4052 // Note: Since gather instruction partially updates the opmask register used 4053 // for predication hense moving mask operand to a temporary. 4054 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4055 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4056 __ lea($tmp$$Register, $mem$$Address); 4057 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4058 %} 4059 ins_pipe( pipe_slow ); 4060 %} 4061 4062 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{ 4063 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4064 match(Set dst (LoadVectorGather mem idx_base)); 4065 effect(TEMP tmp, TEMP rtmp); 4066 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4067 ins_encode %{ 4068 int vlen_enc = vector_length_encoding(this); 4069 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4070 __ lea($tmp$$Register, $mem$$Address); 4071 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp, 4077 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4078 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4079 match(Set dst (LoadVectorGather mem idx_base)); 4080 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4081 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4082 ins_encode %{ 4083 int vlen_enc = vector_length_encoding(this); 4084 int vector_len = Matcher::vector_length(this); 4085 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4086 __ lea($tmp$$Register, $mem$$Address); 4087 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4088 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister, 4089 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4090 %} 4091 ins_pipe( pipe_slow ); 4092 %} 4093 4094 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4095 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4096 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4097 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4098 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4099 ins_encode %{ 4100 int vlen_enc = vector_length_encoding(this); 4101 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4102 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4103 __ lea($tmp$$Register, $mem$$Address); 4104 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4105 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4106 %} 4107 ins_pipe( pipe_slow ); 4108 %} 4109 4110 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp, 4111 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4112 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4113 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4114 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4115 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4116 ins_encode %{ 4117 int vlen_enc = vector_length_encoding(this); 4118 int vector_len = Matcher::vector_length(this); 4119 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4120 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4121 __ lea($tmp$$Register, $mem$$Address); 4122 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4123 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4124 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4125 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4126 %} 4127 ins_pipe( pipe_slow ); 4128 %} 4129 4130 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4131 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4132 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4133 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4134 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4135 ins_encode %{ 4136 int vlen_enc = vector_length_encoding(this); 4137 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4138 __ lea($tmp$$Register, $mem$$Address); 4139 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4140 if (elem_bt == T_SHORT) { 4141 __ movl($mask_idx$$Register, 0x55555555); 4142 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4143 } 4144 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4145 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4146 %} 4147 ins_pipe( pipe_slow ); 4148 %} 4149 4150 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp, 4151 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4152 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4153 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); 4154 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4155 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4156 ins_encode %{ 4157 int vlen_enc = vector_length_encoding(this); 4158 int vector_len = Matcher::vector_length(this); 4159 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4160 __ lea($tmp$$Register, $mem$$Address); 4161 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4162 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4163 if (elem_bt == T_SHORT) { 4164 __ movl($mask_idx$$Register, 0x55555555); 4165 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4166 } 4167 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4168 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4169 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4170 %} 4171 ins_pipe( pipe_slow ); 4172 %} 4173 4174 // ====================Scatter======================================= 4175 4176 // Scatter INT, LONG, FLOAT, DOUBLE 4177 4178 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4179 predicate(UseAVX > 2); 4180 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4181 effect(TEMP tmp, TEMP ktmp); 4182 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4183 ins_encode %{ 4184 int vlen_enc = vector_length_encoding(this, $src); 4185 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4186 4187 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4188 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4189 4190 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4191 __ lea($tmp$$Register, $mem$$Address); 4192 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4198 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4199 effect(TEMP tmp, TEMP ktmp); 4200 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4201 ins_encode %{ 4202 int vlen_enc = vector_length_encoding(this, $src); 4203 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4204 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4205 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4206 // Note: Since scatter instruction partially updates the opmask register used 4207 // for predication hense moving mask operand to a temporary. 4208 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4209 __ lea($tmp$$Register, $mem$$Address); 4210 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4211 %} 4212 ins_pipe( pipe_slow ); 4213 %} 4214 4215 // ====================REPLICATE======================================= 4216 4217 // Replicate byte scalar to be vector 4218 instruct vReplB_reg(vec dst, rRegI src) %{ 4219 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4220 match(Set dst (Replicate src)); 4221 format %{ "replicateB $dst,$src" %} 4222 ins_encode %{ 4223 uint vlen = Matcher::vector_length(this); 4224 if (UseAVX >= 2) { 4225 int vlen_enc = vector_length_encoding(this); 4226 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4227 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4228 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4229 } else { 4230 __ movdl($dst$$XMMRegister, $src$$Register); 4231 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4232 } 4233 } else { 4234 assert(UseAVX < 2, ""); 4235 __ movdl($dst$$XMMRegister, $src$$Register); 4236 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4237 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4238 if (vlen >= 16) { 4239 assert(vlen == 16, ""); 4240 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4241 } 4242 } 4243 %} 4244 ins_pipe( pipe_slow ); 4245 %} 4246 4247 instruct ReplB_mem(vec dst, memory mem) %{ 4248 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4249 match(Set dst (Replicate (LoadB mem))); 4250 format %{ "replicateB $dst,$mem" %} 4251 ins_encode %{ 4252 int vlen_enc = vector_length_encoding(this); 4253 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 // ====================ReplicateS======================================= 4259 4260 instruct vReplS_reg(vec dst, rRegI src) %{ 4261 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4262 match(Set dst (Replicate src)); 4263 format %{ "replicateS $dst,$src" %} 4264 ins_encode %{ 4265 uint vlen = Matcher::vector_length(this); 4266 int vlen_enc = vector_length_encoding(this); 4267 if (UseAVX >= 2) { 4268 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4269 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4270 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4271 } else { 4272 __ movdl($dst$$XMMRegister, $src$$Register); 4273 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4274 } 4275 } else { 4276 assert(UseAVX < 2, ""); 4277 __ movdl($dst$$XMMRegister, $src$$Register); 4278 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4279 if (vlen >= 8) { 4280 assert(vlen == 8, ""); 4281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4282 } 4283 } 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4289 match(Set dst (Replicate con)); 4290 effect(TEMP rtmp); 4291 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4292 ins_encode %{ 4293 int vlen_enc = vector_length_encoding(this); 4294 BasicType bt = Matcher::vector_element_basic_type(this); 4295 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4296 __ movl($rtmp$$Register, $con$$constant); 4297 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4298 %} 4299 ins_pipe( pipe_slow ); 4300 %} 4301 4302 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4303 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4304 match(Set dst (Replicate src)); 4305 effect(TEMP rtmp); 4306 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4307 ins_encode %{ 4308 int vlen_enc = vector_length_encoding(this); 4309 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4310 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 instruct ReplS_mem(vec dst, memory mem) %{ 4316 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4317 match(Set dst (Replicate (LoadS mem))); 4318 format %{ "replicateS $dst,$mem" %} 4319 ins_encode %{ 4320 int vlen_enc = vector_length_encoding(this); 4321 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4322 %} 4323 ins_pipe( pipe_slow ); 4324 %} 4325 4326 // ====================ReplicateI======================================= 4327 4328 instruct ReplI_reg(vec dst, rRegI src) %{ 4329 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4330 match(Set dst (Replicate src)); 4331 format %{ "replicateI $dst,$src" %} 4332 ins_encode %{ 4333 uint vlen = Matcher::vector_length(this); 4334 int vlen_enc = vector_length_encoding(this); 4335 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4336 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4337 } else if (VM_Version::supports_avx2()) { 4338 __ movdl($dst$$XMMRegister, $src$$Register); 4339 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4340 } else { 4341 __ movdl($dst$$XMMRegister, $src$$Register); 4342 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4343 } 4344 %} 4345 ins_pipe( pipe_slow ); 4346 %} 4347 4348 instruct ReplI_mem(vec dst, memory mem) %{ 4349 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4350 match(Set dst (Replicate (LoadI mem))); 4351 format %{ "replicateI $dst,$mem" %} 4352 ins_encode %{ 4353 int vlen_enc = vector_length_encoding(this); 4354 if (VM_Version::supports_avx2()) { 4355 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4356 } else if (VM_Version::supports_avx()) { 4357 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4358 } else { 4359 __ movdl($dst$$XMMRegister, $mem$$Address); 4360 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4361 } 4362 %} 4363 ins_pipe( pipe_slow ); 4364 %} 4365 4366 instruct ReplI_imm(vec dst, immI con) %{ 4367 predicate(Matcher::is_non_long_integral_vector(n)); 4368 match(Set dst (Replicate con)); 4369 format %{ "replicateI $dst,$con" %} 4370 ins_encode %{ 4371 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4372 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4373 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4374 BasicType bt = Matcher::vector_element_basic_type(this); 4375 int vlen = Matcher::vector_length_in_bytes(this); 4376 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4377 %} 4378 ins_pipe( pipe_slow ); 4379 %} 4380 4381 // Replicate scalar zero to be vector 4382 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4383 predicate(Matcher::is_non_long_integral_vector(n)); 4384 match(Set dst (Replicate zero)); 4385 format %{ "replicateI $dst,$zero" %} 4386 ins_encode %{ 4387 int vlen_enc = vector_length_encoding(this); 4388 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4389 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4390 } else { 4391 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4392 } 4393 %} 4394 ins_pipe( fpu_reg_reg ); 4395 %} 4396 4397 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4398 predicate(Matcher::is_non_long_integral_vector(n)); 4399 match(Set dst (Replicate con)); 4400 format %{ "vallones $dst" %} 4401 ins_encode %{ 4402 int vector_len = vector_length_encoding(this); 4403 __ vallones($dst$$XMMRegister, vector_len); 4404 %} 4405 ins_pipe( pipe_slow ); 4406 %} 4407 4408 // ====================ReplicateL======================================= 4409 4410 // Replicate long (8 byte) scalar to be vector 4411 instruct ReplL_reg(vec dst, rRegL src) %{ 4412 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4413 match(Set dst (Replicate src)); 4414 format %{ "replicateL $dst,$src" %} 4415 ins_encode %{ 4416 int vlen = Matcher::vector_length(this); 4417 int vlen_enc = vector_length_encoding(this); 4418 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4419 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4420 } else if (VM_Version::supports_avx2()) { 4421 __ movdq($dst$$XMMRegister, $src$$Register); 4422 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4423 } else { 4424 __ movdq($dst$$XMMRegister, $src$$Register); 4425 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4426 } 4427 %} 4428 ins_pipe( pipe_slow ); 4429 %} 4430 4431 instruct ReplL_mem(vec dst, memory mem) %{ 4432 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4433 match(Set dst (Replicate (LoadL mem))); 4434 format %{ "replicateL $dst,$mem" %} 4435 ins_encode %{ 4436 int vlen_enc = vector_length_encoding(this); 4437 if (VM_Version::supports_avx2()) { 4438 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4439 } else if (VM_Version::supports_sse3()) { 4440 __ movddup($dst$$XMMRegister, $mem$$Address); 4441 } else { 4442 __ movq($dst$$XMMRegister, $mem$$Address); 4443 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4444 } 4445 %} 4446 ins_pipe( pipe_slow ); 4447 %} 4448 4449 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4450 instruct ReplL_imm(vec dst, immL con) %{ 4451 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4452 match(Set dst (Replicate con)); 4453 format %{ "replicateL $dst,$con" %} 4454 ins_encode %{ 4455 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4456 int vlen = Matcher::vector_length_in_bytes(this); 4457 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct ReplL_zero(vec dst, immL0 zero) %{ 4463 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4464 match(Set dst (Replicate zero)); 4465 format %{ "replicateL $dst,$zero" %} 4466 ins_encode %{ 4467 int vlen_enc = vector_length_encoding(this); 4468 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4469 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4470 } else { 4471 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4472 } 4473 %} 4474 ins_pipe( fpu_reg_reg ); 4475 %} 4476 4477 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4478 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4479 match(Set dst (Replicate con)); 4480 format %{ "vallones $dst" %} 4481 ins_encode %{ 4482 int vector_len = vector_length_encoding(this); 4483 __ vallones($dst$$XMMRegister, vector_len); 4484 %} 4485 ins_pipe( pipe_slow ); 4486 %} 4487 4488 // ====================ReplicateF======================================= 4489 4490 instruct vReplF_reg(vec dst, vlRegF src) %{ 4491 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4492 match(Set dst (Replicate src)); 4493 format %{ "replicateF $dst,$src" %} 4494 ins_encode %{ 4495 uint vlen = Matcher::vector_length(this); 4496 int vlen_enc = vector_length_encoding(this); 4497 if (vlen <= 4) { 4498 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4499 } else if (VM_Version::supports_avx2()) { 4500 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4501 } else { 4502 assert(vlen == 8, "sanity"); 4503 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4504 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4505 } 4506 %} 4507 ins_pipe( pipe_slow ); 4508 %} 4509 4510 instruct ReplF_reg(vec dst, vlRegF src) %{ 4511 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4512 match(Set dst (Replicate src)); 4513 format %{ "replicateF $dst,$src" %} 4514 ins_encode %{ 4515 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct ReplF_mem(vec dst, memory mem) %{ 4521 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4522 match(Set dst (Replicate (LoadF mem))); 4523 format %{ "replicateF $dst,$mem" %} 4524 ins_encode %{ 4525 int vlen_enc = vector_length_encoding(this); 4526 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 // Replicate float scalar immediate to be vector by loading from const table. 4532 instruct ReplF_imm(vec dst, immF con) %{ 4533 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4534 match(Set dst (Replicate con)); 4535 format %{ "replicateF $dst,$con" %} 4536 ins_encode %{ 4537 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4538 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4539 int vlen = Matcher::vector_length_in_bytes(this); 4540 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 instruct ReplF_zero(vec dst, immF0 zero) %{ 4546 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4547 match(Set dst (Replicate zero)); 4548 format %{ "replicateF $dst,$zero" %} 4549 ins_encode %{ 4550 int vlen_enc = vector_length_encoding(this); 4551 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4552 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4553 } else { 4554 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4555 } 4556 %} 4557 ins_pipe( fpu_reg_reg ); 4558 %} 4559 4560 // ====================ReplicateD======================================= 4561 4562 // Replicate double (8 bytes) scalar to be vector 4563 instruct vReplD_reg(vec dst, vlRegD src) %{ 4564 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4565 match(Set dst (Replicate src)); 4566 format %{ "replicateD $dst,$src" %} 4567 ins_encode %{ 4568 uint vlen = Matcher::vector_length(this); 4569 int vlen_enc = vector_length_encoding(this); 4570 if (vlen <= 2) { 4571 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4572 } else if (VM_Version::supports_avx2()) { 4573 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4574 } else { 4575 assert(vlen == 4, "sanity"); 4576 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4577 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4578 } 4579 %} 4580 ins_pipe( pipe_slow ); 4581 %} 4582 4583 instruct ReplD_reg(vec dst, vlRegD src) %{ 4584 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4585 match(Set dst (Replicate src)); 4586 format %{ "replicateD $dst,$src" %} 4587 ins_encode %{ 4588 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4589 %} 4590 ins_pipe( pipe_slow ); 4591 %} 4592 4593 instruct ReplD_mem(vec dst, memory mem) %{ 4594 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4595 match(Set dst (Replicate (LoadD mem))); 4596 format %{ "replicateD $dst,$mem" %} 4597 ins_encode %{ 4598 if (Matcher::vector_length(this) >= 4) { 4599 int vlen_enc = vector_length_encoding(this); 4600 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4601 } else { 4602 __ movddup($dst$$XMMRegister, $mem$$Address); 4603 } 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4609 instruct ReplD_imm(vec dst, immD con) %{ 4610 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4611 match(Set dst (Replicate con)); 4612 format %{ "replicateD $dst,$con" %} 4613 ins_encode %{ 4614 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4615 int vlen = Matcher::vector_length_in_bytes(this); 4616 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4617 %} 4618 ins_pipe( pipe_slow ); 4619 %} 4620 4621 instruct ReplD_zero(vec dst, immD0 zero) %{ 4622 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4623 match(Set dst (Replicate zero)); 4624 format %{ "replicateD $dst,$zero" %} 4625 ins_encode %{ 4626 int vlen_enc = vector_length_encoding(this); 4627 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4628 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4629 } else { 4630 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4631 } 4632 %} 4633 ins_pipe( fpu_reg_reg ); 4634 %} 4635 4636 // ====================VECTOR INSERT======================================= 4637 4638 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4639 predicate(Matcher::vector_length_in_bytes(n) < 32); 4640 match(Set dst (VectorInsert (Binary dst val) idx)); 4641 format %{ "vector_insert $dst,$val,$idx" %} 4642 ins_encode %{ 4643 assert(UseSSE >= 4, "required"); 4644 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4645 4646 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4647 4648 assert(is_integral_type(elem_bt), ""); 4649 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4650 4651 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4652 %} 4653 ins_pipe( pipe_slow ); 4654 %} 4655 4656 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4657 predicate(Matcher::vector_length_in_bytes(n) == 32); 4658 match(Set dst (VectorInsert (Binary src val) idx)); 4659 effect(TEMP vtmp); 4660 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4661 ins_encode %{ 4662 int vlen_enc = Assembler::AVX_256bit; 4663 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4664 int elem_per_lane = 16/type2aelembytes(elem_bt); 4665 int log2epr = log2(elem_per_lane); 4666 4667 assert(is_integral_type(elem_bt), "sanity"); 4668 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4669 4670 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4671 uint y_idx = ($idx$$constant >> log2epr) & 1; 4672 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4673 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4674 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4675 %} 4676 ins_pipe( pipe_slow ); 4677 %} 4678 4679 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4680 predicate(Matcher::vector_length_in_bytes(n) == 64); 4681 match(Set dst (VectorInsert (Binary src val) idx)); 4682 effect(TEMP vtmp); 4683 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4684 ins_encode %{ 4685 assert(UseAVX > 2, "sanity"); 4686 4687 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4688 int elem_per_lane = 16/type2aelembytes(elem_bt); 4689 int log2epr = log2(elem_per_lane); 4690 4691 assert(is_integral_type(elem_bt), ""); 4692 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4693 4694 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4695 uint y_idx = ($idx$$constant >> log2epr) & 3; 4696 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4697 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4698 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4704 predicate(Matcher::vector_length(n) == 2); 4705 match(Set dst (VectorInsert (Binary dst val) idx)); 4706 format %{ "vector_insert $dst,$val,$idx" %} 4707 ins_encode %{ 4708 assert(UseSSE >= 4, "required"); 4709 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4710 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4711 4712 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4713 %} 4714 ins_pipe( pipe_slow ); 4715 %} 4716 4717 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4718 predicate(Matcher::vector_length(n) == 4); 4719 match(Set dst (VectorInsert (Binary src val) idx)); 4720 effect(TEMP vtmp); 4721 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4722 ins_encode %{ 4723 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4724 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4725 4726 uint x_idx = $idx$$constant & right_n_bits(1); 4727 uint y_idx = ($idx$$constant >> 1) & 1; 4728 int vlen_enc = Assembler::AVX_256bit; 4729 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4730 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4731 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4732 %} 4733 ins_pipe( pipe_slow ); 4734 %} 4735 4736 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4737 predicate(Matcher::vector_length(n) == 8); 4738 match(Set dst (VectorInsert (Binary src val) idx)); 4739 effect(TEMP vtmp); 4740 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4741 ins_encode %{ 4742 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4743 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4744 4745 uint x_idx = $idx$$constant & right_n_bits(1); 4746 uint y_idx = ($idx$$constant >> 1) & 3; 4747 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4748 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4749 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4750 %} 4751 ins_pipe( pipe_slow ); 4752 %} 4753 4754 instruct insertF(vec dst, regF val, immU8 idx) %{ 4755 predicate(Matcher::vector_length(n) < 8); 4756 match(Set dst (VectorInsert (Binary dst val) idx)); 4757 format %{ "vector_insert $dst,$val,$idx" %} 4758 ins_encode %{ 4759 assert(UseSSE >= 4, "sanity"); 4760 4761 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4762 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4763 4764 uint x_idx = $idx$$constant & right_n_bits(2); 4765 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4771 predicate(Matcher::vector_length(n) >= 8); 4772 match(Set dst (VectorInsert (Binary src val) idx)); 4773 effect(TEMP vtmp); 4774 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4775 ins_encode %{ 4776 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4777 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4778 4779 int vlen = Matcher::vector_length(this); 4780 uint x_idx = $idx$$constant & right_n_bits(2); 4781 if (vlen == 8) { 4782 uint y_idx = ($idx$$constant >> 2) & 1; 4783 int vlen_enc = Assembler::AVX_256bit; 4784 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4785 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4786 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4787 } else { 4788 assert(vlen == 16, "sanity"); 4789 uint y_idx = ($idx$$constant >> 2) & 3; 4790 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4791 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4792 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4793 } 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4799 predicate(Matcher::vector_length(n) == 2); 4800 match(Set dst (VectorInsert (Binary dst val) idx)); 4801 effect(TEMP tmp); 4802 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4803 ins_encode %{ 4804 assert(UseSSE >= 4, "sanity"); 4805 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4806 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4807 4808 __ movq($tmp$$Register, $val$$XMMRegister); 4809 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4810 %} 4811 ins_pipe( pipe_slow ); 4812 %} 4813 4814 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4815 predicate(Matcher::vector_length(n) == 4); 4816 match(Set dst (VectorInsert (Binary src val) idx)); 4817 effect(TEMP vtmp, TEMP tmp); 4818 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4819 ins_encode %{ 4820 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4821 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4822 4823 uint x_idx = $idx$$constant & right_n_bits(1); 4824 uint y_idx = ($idx$$constant >> 1) & 1; 4825 int vlen_enc = Assembler::AVX_256bit; 4826 __ movq($tmp$$Register, $val$$XMMRegister); 4827 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4828 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4829 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4830 %} 4831 ins_pipe( pipe_slow ); 4832 %} 4833 4834 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4835 predicate(Matcher::vector_length(n) == 8); 4836 match(Set dst (VectorInsert (Binary src val) idx)); 4837 effect(TEMP tmp, TEMP vtmp); 4838 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4839 ins_encode %{ 4840 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4841 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4842 4843 uint x_idx = $idx$$constant & right_n_bits(1); 4844 uint y_idx = ($idx$$constant >> 1) & 3; 4845 __ movq($tmp$$Register, $val$$XMMRegister); 4846 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4847 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4848 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 // ====================REDUCTION ARITHMETIC======================================= 4854 4855 // =======================Int Reduction========================================== 4856 4857 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4858 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4859 match(Set dst (AddReductionVI src1 src2)); 4860 match(Set dst (MulReductionVI src1 src2)); 4861 match(Set dst (AndReductionV src1 src2)); 4862 match(Set dst ( OrReductionV src1 src2)); 4863 match(Set dst (XorReductionV src1 src2)); 4864 match(Set dst (MinReductionV src1 src2)); 4865 match(Set dst (MaxReductionV src1 src2)); 4866 effect(TEMP vtmp1, TEMP vtmp2); 4867 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4868 ins_encode %{ 4869 int opcode = this->ideal_Opcode(); 4870 int vlen = Matcher::vector_length(this, $src2); 4871 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4872 %} 4873 ins_pipe( pipe_slow ); 4874 %} 4875 4876 // =======================Long Reduction========================================== 4877 4878 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4879 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4880 match(Set dst (AddReductionVL src1 src2)); 4881 match(Set dst (MulReductionVL src1 src2)); 4882 match(Set dst (AndReductionV src1 src2)); 4883 match(Set dst ( OrReductionV src1 src2)); 4884 match(Set dst (XorReductionV src1 src2)); 4885 match(Set dst (MinReductionV src1 src2)); 4886 match(Set dst (MaxReductionV src1 src2)); 4887 effect(TEMP vtmp1, TEMP vtmp2); 4888 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4889 ins_encode %{ 4890 int opcode = this->ideal_Opcode(); 4891 int vlen = Matcher::vector_length(this, $src2); 4892 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4893 %} 4894 ins_pipe( pipe_slow ); 4895 %} 4896 4897 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4898 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4899 match(Set dst (AddReductionVL src1 src2)); 4900 match(Set dst (MulReductionVL src1 src2)); 4901 match(Set dst (AndReductionV src1 src2)); 4902 match(Set dst ( OrReductionV src1 src2)); 4903 match(Set dst (XorReductionV src1 src2)); 4904 match(Set dst (MinReductionV src1 src2)); 4905 match(Set dst (MaxReductionV src1 src2)); 4906 effect(TEMP vtmp1, TEMP vtmp2); 4907 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4908 ins_encode %{ 4909 int opcode = this->ideal_Opcode(); 4910 int vlen = Matcher::vector_length(this, $src2); 4911 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4912 %} 4913 ins_pipe( pipe_slow ); 4914 %} 4915 4916 // =======================Float Reduction========================================== 4917 4918 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4919 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 4920 match(Set dst (AddReductionVF dst src)); 4921 match(Set dst (MulReductionVF dst src)); 4922 effect(TEMP dst, TEMP vtmp); 4923 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4924 ins_encode %{ 4925 int opcode = this->ideal_Opcode(); 4926 int vlen = Matcher::vector_length(this, $src); 4927 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4928 %} 4929 ins_pipe( pipe_slow ); 4930 %} 4931 4932 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4933 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 4934 match(Set dst (AddReductionVF dst src)); 4935 match(Set dst (MulReductionVF dst src)); 4936 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4937 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4938 ins_encode %{ 4939 int opcode = this->ideal_Opcode(); 4940 int vlen = Matcher::vector_length(this, $src); 4941 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4942 %} 4943 ins_pipe( pipe_slow ); 4944 %} 4945 4946 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4947 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 4948 match(Set dst (AddReductionVF dst src)); 4949 match(Set dst (MulReductionVF dst src)); 4950 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4951 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4952 ins_encode %{ 4953 int opcode = this->ideal_Opcode(); 4954 int vlen = Matcher::vector_length(this, $src); 4955 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4956 %} 4957 ins_pipe( pipe_slow ); 4958 %} 4959 4960 4961 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 4962 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4963 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4964 // src1 contains reduction identity 4965 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 4966 match(Set dst (AddReductionVF src1 src2)); 4967 match(Set dst (MulReductionVF src1 src2)); 4968 effect(TEMP dst); 4969 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 4970 ins_encode %{ 4971 int opcode = this->ideal_Opcode(); 4972 int vlen = Matcher::vector_length(this, $src2); 4973 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 4974 %} 4975 ins_pipe( pipe_slow ); 4976 %} 4977 4978 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 4979 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4980 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4981 // src1 contains reduction identity 4982 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 4983 match(Set dst (AddReductionVF src1 src2)); 4984 match(Set dst (MulReductionVF src1 src2)); 4985 effect(TEMP dst, TEMP vtmp); 4986 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 4987 ins_encode %{ 4988 int opcode = this->ideal_Opcode(); 4989 int vlen = Matcher::vector_length(this, $src2); 4990 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 4991 %} 4992 ins_pipe( pipe_slow ); 4993 %} 4994 4995 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 4996 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 4997 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 4998 // src1 contains reduction identity 4999 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5000 match(Set dst (AddReductionVF src1 src2)); 5001 match(Set dst (MulReductionVF src1 src2)); 5002 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5003 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5004 ins_encode %{ 5005 int opcode = this->ideal_Opcode(); 5006 int vlen = Matcher::vector_length(this, $src2); 5007 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5008 %} 5009 ins_pipe( pipe_slow ); 5010 %} 5011 5012 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5013 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5014 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5015 // src1 contains reduction identity 5016 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5017 match(Set dst (AddReductionVF src1 src2)); 5018 match(Set dst (MulReductionVF src1 src2)); 5019 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5020 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5021 ins_encode %{ 5022 int opcode = this->ideal_Opcode(); 5023 int vlen = Matcher::vector_length(this, $src2); 5024 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5025 %} 5026 ins_pipe( pipe_slow ); 5027 %} 5028 5029 // =======================Double Reduction========================================== 5030 5031 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5032 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5033 match(Set dst (AddReductionVD dst src)); 5034 match(Set dst (MulReductionVD dst src)); 5035 effect(TEMP dst, TEMP vtmp); 5036 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5037 ins_encode %{ 5038 int opcode = this->ideal_Opcode(); 5039 int vlen = Matcher::vector_length(this, $src); 5040 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5041 %} 5042 ins_pipe( pipe_slow ); 5043 %} 5044 5045 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5046 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5047 match(Set dst (AddReductionVD dst src)); 5048 match(Set dst (MulReductionVD dst src)); 5049 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5050 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5051 ins_encode %{ 5052 int opcode = this->ideal_Opcode(); 5053 int vlen = Matcher::vector_length(this, $src); 5054 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5055 %} 5056 ins_pipe( pipe_slow ); 5057 %} 5058 5059 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5060 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5061 match(Set dst (AddReductionVD dst src)); 5062 match(Set dst (MulReductionVD dst src)); 5063 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5064 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5065 ins_encode %{ 5066 int opcode = this->ideal_Opcode(); 5067 int vlen = Matcher::vector_length(this, $src); 5068 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5069 %} 5070 ins_pipe( pipe_slow ); 5071 %} 5072 5073 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5074 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5075 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5076 // src1 contains reduction identity 5077 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5078 match(Set dst (AddReductionVD src1 src2)); 5079 match(Set dst (MulReductionVD src1 src2)); 5080 effect(TEMP dst); 5081 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5082 ins_encode %{ 5083 int opcode = this->ideal_Opcode(); 5084 int vlen = Matcher::vector_length(this, $src2); 5085 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5086 %} 5087 ins_pipe( pipe_slow ); 5088 %} 5089 5090 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5091 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5092 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5093 // src1 contains reduction identity 5094 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5095 match(Set dst (AddReductionVD src1 src2)); 5096 match(Set dst (MulReductionVD src1 src2)); 5097 effect(TEMP dst, TEMP vtmp); 5098 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5099 ins_encode %{ 5100 int opcode = this->ideal_Opcode(); 5101 int vlen = Matcher::vector_length(this, $src2); 5102 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5103 %} 5104 ins_pipe( pipe_slow ); 5105 %} 5106 5107 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5108 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5109 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5110 // src1 contains reduction identity 5111 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5112 match(Set dst (AddReductionVD src1 src2)); 5113 match(Set dst (MulReductionVD src1 src2)); 5114 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5115 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5116 ins_encode %{ 5117 int opcode = this->ideal_Opcode(); 5118 int vlen = Matcher::vector_length(this, $src2); 5119 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5120 %} 5121 ins_pipe( pipe_slow ); 5122 %} 5123 5124 // =======================Byte Reduction========================================== 5125 5126 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5127 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5128 match(Set dst (AddReductionVI src1 src2)); 5129 match(Set dst (AndReductionV src1 src2)); 5130 match(Set dst ( OrReductionV src1 src2)); 5131 match(Set dst (XorReductionV src1 src2)); 5132 match(Set dst (MinReductionV src1 src2)); 5133 match(Set dst (MaxReductionV src1 src2)); 5134 effect(TEMP vtmp1, TEMP vtmp2); 5135 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5136 ins_encode %{ 5137 int opcode = this->ideal_Opcode(); 5138 int vlen = Matcher::vector_length(this, $src2); 5139 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5140 %} 5141 ins_pipe( pipe_slow ); 5142 %} 5143 5144 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5145 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5146 match(Set dst (AddReductionVI src1 src2)); 5147 match(Set dst (AndReductionV src1 src2)); 5148 match(Set dst ( OrReductionV src1 src2)); 5149 match(Set dst (XorReductionV src1 src2)); 5150 match(Set dst (MinReductionV src1 src2)); 5151 match(Set dst (MaxReductionV src1 src2)); 5152 effect(TEMP vtmp1, TEMP vtmp2); 5153 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5154 ins_encode %{ 5155 int opcode = this->ideal_Opcode(); 5156 int vlen = Matcher::vector_length(this, $src2); 5157 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5158 %} 5159 ins_pipe( pipe_slow ); 5160 %} 5161 5162 // =======================Short Reduction========================================== 5163 5164 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5165 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5166 match(Set dst (AddReductionVI src1 src2)); 5167 match(Set dst (MulReductionVI src1 src2)); 5168 match(Set dst (AndReductionV src1 src2)); 5169 match(Set dst ( OrReductionV src1 src2)); 5170 match(Set dst (XorReductionV src1 src2)); 5171 match(Set dst (MinReductionV src1 src2)); 5172 match(Set dst (MaxReductionV src1 src2)); 5173 effect(TEMP vtmp1, TEMP vtmp2); 5174 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5175 ins_encode %{ 5176 int opcode = this->ideal_Opcode(); 5177 int vlen = Matcher::vector_length(this, $src2); 5178 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5179 %} 5180 ins_pipe( pipe_slow ); 5181 %} 5182 5183 // =======================Mul Reduction========================================== 5184 5185 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5186 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5187 Matcher::vector_length(n->in(2)) <= 32); // src2 5188 match(Set dst (MulReductionVI src1 src2)); 5189 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5190 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5191 ins_encode %{ 5192 int opcode = this->ideal_Opcode(); 5193 int vlen = Matcher::vector_length(this, $src2); 5194 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5195 %} 5196 ins_pipe( pipe_slow ); 5197 %} 5198 5199 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5200 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5201 Matcher::vector_length(n->in(2)) == 64); // src2 5202 match(Set dst (MulReductionVI src1 src2)); 5203 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5204 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5205 ins_encode %{ 5206 int opcode = this->ideal_Opcode(); 5207 int vlen = Matcher::vector_length(this, $src2); 5208 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5209 %} 5210 ins_pipe( pipe_slow ); 5211 %} 5212 5213 //--------------------Min/Max Float Reduction -------------------- 5214 // Float Min Reduction 5215 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5216 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5217 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5218 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5219 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5220 Matcher::vector_length(n->in(2)) == 2); 5221 match(Set dst (MinReductionV src1 src2)); 5222 match(Set dst (MaxReductionV src1 src2)); 5223 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5224 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5225 ins_encode %{ 5226 assert(UseAVX > 0, "sanity"); 5227 5228 int opcode = this->ideal_Opcode(); 5229 int vlen = Matcher::vector_length(this, $src2); 5230 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5231 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5237 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5238 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5239 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5240 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5241 Matcher::vector_length(n->in(2)) >= 4); 5242 match(Set dst (MinReductionV src1 src2)); 5243 match(Set dst (MaxReductionV src1 src2)); 5244 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5245 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5246 ins_encode %{ 5247 assert(UseAVX > 0, "sanity"); 5248 5249 int opcode = this->ideal_Opcode(); 5250 int vlen = Matcher::vector_length(this, $src2); 5251 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5252 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5253 %} 5254 ins_pipe( pipe_slow ); 5255 %} 5256 5257 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, 5258 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5259 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5260 Matcher::vector_length(n->in(2)) == 2); 5261 match(Set dst (MinReductionV dst src)); 5262 match(Set dst (MaxReductionV dst src)); 5263 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5264 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5265 ins_encode %{ 5266 assert(UseAVX > 0, "sanity"); 5267 5268 int opcode = this->ideal_Opcode(); 5269 int vlen = Matcher::vector_length(this, $src); 5270 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5271 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5272 %} 5273 ins_pipe( pipe_slow ); 5274 %} 5275 5276 5277 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, 5278 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5279 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5280 Matcher::vector_length(n->in(2)) >= 4); 5281 match(Set dst (MinReductionV dst src)); 5282 match(Set dst (MaxReductionV dst src)); 5283 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5284 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5285 ins_encode %{ 5286 assert(UseAVX > 0, "sanity"); 5287 5288 int opcode = this->ideal_Opcode(); 5289 int vlen = Matcher::vector_length(this, $src); 5290 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5291 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5292 %} 5293 ins_pipe( pipe_slow ); 5294 %} 5295 5296 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ 5297 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5298 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5299 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5300 Matcher::vector_length(n->in(2)) == 2); 5301 match(Set dst (MinReductionV src1 src2)); 5302 match(Set dst (MaxReductionV src1 src2)); 5303 effect(TEMP dst, TEMP xtmp1); 5304 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} 5305 ins_encode %{ 5306 int opcode = this->ideal_Opcode(); 5307 int vlen = Matcher::vector_length(this, $src2); 5308 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5309 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5310 %} 5311 ins_pipe( pipe_slow ); 5312 %} 5313 5314 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ 5315 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5316 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5317 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5318 Matcher::vector_length(n->in(2)) >= 4); 5319 match(Set dst (MinReductionV src1 src2)); 5320 match(Set dst (MaxReductionV src1 src2)); 5321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5322 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} 5323 ins_encode %{ 5324 int opcode = this->ideal_Opcode(); 5325 int vlen = Matcher::vector_length(this, $src2); 5326 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5327 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 5332 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ 5333 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5334 Matcher::vector_length(n->in(2)) == 2); 5335 match(Set dst (MinReductionV dst src)); 5336 match(Set dst (MaxReductionV dst src)); 5337 effect(TEMP dst, TEMP xtmp1); 5338 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} 5339 ins_encode %{ 5340 int opcode = this->ideal_Opcode(); 5341 int vlen = Matcher::vector_length(this, $src); 5342 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5343 $xtmp1$$XMMRegister); 5344 %} 5345 ins_pipe( pipe_slow ); 5346 %} 5347 5348 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ 5349 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5350 Matcher::vector_length(n->in(2)) >= 4); 5351 match(Set dst (MinReductionV dst src)); 5352 match(Set dst (MaxReductionV dst src)); 5353 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5354 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} 5355 ins_encode %{ 5356 int opcode = this->ideal_Opcode(); 5357 int vlen = Matcher::vector_length(this, $src); 5358 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5359 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5360 %} 5361 ins_pipe( pipe_slow ); 5362 %} 5363 5364 //--------------------Min Double Reduction -------------------- 5365 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5366 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5367 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5368 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5369 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5370 Matcher::vector_length(n->in(2)) == 2); 5371 match(Set dst (MinReductionV src1 src2)); 5372 match(Set dst (MaxReductionV src1 src2)); 5373 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5374 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5375 ins_encode %{ 5376 assert(UseAVX > 0, "sanity"); 5377 5378 int opcode = this->ideal_Opcode(); 5379 int vlen = Matcher::vector_length(this, $src2); 5380 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5381 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5382 %} 5383 ins_pipe( pipe_slow ); 5384 %} 5385 5386 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5387 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5388 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5389 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5390 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5391 Matcher::vector_length(n->in(2)) >= 4); 5392 match(Set dst (MinReductionV src1 src2)); 5393 match(Set dst (MaxReductionV src1 src2)); 5394 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5395 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5396 ins_encode %{ 5397 assert(UseAVX > 0, "sanity"); 5398 5399 int opcode = this->ideal_Opcode(); 5400 int vlen = Matcher::vector_length(this, $src2); 5401 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5402 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5403 %} 5404 ins_pipe( pipe_slow ); 5405 %} 5406 5407 5408 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, 5409 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5410 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5411 Matcher::vector_length(n->in(2)) == 2); 5412 match(Set dst (MinReductionV dst src)); 5413 match(Set dst (MaxReductionV dst src)); 5414 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5415 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5416 ins_encode %{ 5417 assert(UseAVX > 0, "sanity"); 5418 5419 int opcode = this->ideal_Opcode(); 5420 int vlen = Matcher::vector_length(this, $src); 5421 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5422 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 5427 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, 5428 legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5429 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5430 Matcher::vector_length(n->in(2)) >= 4); 5431 match(Set dst (MinReductionV dst src)); 5432 match(Set dst (MaxReductionV dst src)); 5433 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5434 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5435 ins_encode %{ 5436 assert(UseAVX > 0, "sanity"); 5437 5438 int opcode = this->ideal_Opcode(); 5439 int vlen = Matcher::vector_length(this, $src); 5440 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5441 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ 5447 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5448 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5449 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5450 Matcher::vector_length(n->in(2)) == 2); 5451 match(Set dst (MinReductionV src1 src2)); 5452 match(Set dst (MaxReductionV src1 src2)); 5453 effect(TEMP dst, TEMP xtmp1); 5454 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} 5455 ins_encode %{ 5456 int opcode = this->ideal_Opcode(); 5457 int vlen = Matcher::vector_length(this, $src2); 5458 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, 5459 xnoreg, xnoreg, $xtmp1$$XMMRegister); 5460 %} 5461 ins_pipe( pipe_slow ); 5462 %} 5463 5464 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ 5465 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5466 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5467 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5468 Matcher::vector_length(n->in(2)) >= 4); 5469 match(Set dst (MinReductionV src1 src2)); 5470 match(Set dst (MaxReductionV src1 src2)); 5471 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5472 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} 5473 ins_encode %{ 5474 int opcode = this->ideal_Opcode(); 5475 int vlen = Matcher::vector_length(this, $src2); 5476 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5477 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5478 %} 5479 ins_pipe( pipe_slow ); 5480 %} 5481 5482 5483 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ 5484 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5485 Matcher::vector_length(n->in(2)) == 2); 5486 match(Set dst (MinReductionV dst src)); 5487 match(Set dst (MaxReductionV dst src)); 5488 effect(TEMP dst, TEMP xtmp1); 5489 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} 5490 ins_encode %{ 5491 int opcode = this->ideal_Opcode(); 5492 int vlen = Matcher::vector_length(this, $src); 5493 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5494 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ 5500 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5501 Matcher::vector_length(n->in(2)) >= 4); 5502 match(Set dst (MinReductionV dst src)); 5503 match(Set dst (MaxReductionV dst src)); 5504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5505 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} 5506 ins_encode %{ 5507 int opcode = this->ideal_Opcode(); 5508 int vlen = Matcher::vector_length(this, $src); 5509 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5510 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5511 %} 5512 ins_pipe( pipe_slow ); 5513 %} 5514 5515 // ====================VECTOR ARITHMETIC======================================= 5516 5517 // --------------------------------- ADD -------------------------------------- 5518 5519 // Bytes vector add 5520 instruct vaddB(vec dst, vec src) %{ 5521 predicate(UseAVX == 0); 5522 match(Set dst (AddVB dst src)); 5523 format %{ "paddb $dst,$src\t! add packedB" %} 5524 ins_encode %{ 5525 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5526 %} 5527 ins_pipe( pipe_slow ); 5528 %} 5529 5530 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5531 predicate(UseAVX > 0); 5532 match(Set dst (AddVB src1 src2)); 5533 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5534 ins_encode %{ 5535 int vlen_enc = vector_length_encoding(this); 5536 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5537 %} 5538 ins_pipe( pipe_slow ); 5539 %} 5540 5541 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5542 predicate((UseAVX > 0) && 5543 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5544 match(Set dst (AddVB src (LoadVector mem))); 5545 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5546 ins_encode %{ 5547 int vlen_enc = vector_length_encoding(this); 5548 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5549 %} 5550 ins_pipe( pipe_slow ); 5551 %} 5552 5553 // Shorts/Chars vector add 5554 instruct vaddS(vec dst, vec src) %{ 5555 predicate(UseAVX == 0); 5556 match(Set dst (AddVS dst src)); 5557 format %{ "paddw $dst,$src\t! add packedS" %} 5558 ins_encode %{ 5559 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5560 %} 5561 ins_pipe( pipe_slow ); 5562 %} 5563 5564 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5565 predicate(UseAVX > 0); 5566 match(Set dst (AddVS src1 src2)); 5567 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5568 ins_encode %{ 5569 int vlen_enc = vector_length_encoding(this); 5570 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5576 predicate((UseAVX > 0) && 5577 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5578 match(Set dst (AddVS src (LoadVector mem))); 5579 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5580 ins_encode %{ 5581 int vlen_enc = vector_length_encoding(this); 5582 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5583 %} 5584 ins_pipe( pipe_slow ); 5585 %} 5586 5587 // Integers vector add 5588 instruct vaddI(vec dst, vec src) %{ 5589 predicate(UseAVX == 0); 5590 match(Set dst (AddVI dst src)); 5591 format %{ "paddd $dst,$src\t! add packedI" %} 5592 ins_encode %{ 5593 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5594 %} 5595 ins_pipe( pipe_slow ); 5596 %} 5597 5598 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5599 predicate(UseAVX > 0); 5600 match(Set dst (AddVI src1 src2)); 5601 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5602 ins_encode %{ 5603 int vlen_enc = vector_length_encoding(this); 5604 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 5609 5610 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5611 predicate((UseAVX > 0) && 5612 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5613 match(Set dst (AddVI src (LoadVector mem))); 5614 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5615 ins_encode %{ 5616 int vlen_enc = vector_length_encoding(this); 5617 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 // Longs vector add 5623 instruct vaddL(vec dst, vec src) %{ 5624 predicate(UseAVX == 0); 5625 match(Set dst (AddVL dst src)); 5626 format %{ "paddq $dst,$src\t! add packedL" %} 5627 ins_encode %{ 5628 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5629 %} 5630 ins_pipe( pipe_slow ); 5631 %} 5632 5633 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5634 predicate(UseAVX > 0); 5635 match(Set dst (AddVL src1 src2)); 5636 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5637 ins_encode %{ 5638 int vlen_enc = vector_length_encoding(this); 5639 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5645 predicate((UseAVX > 0) && 5646 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5647 match(Set dst (AddVL src (LoadVector mem))); 5648 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5649 ins_encode %{ 5650 int vlen_enc = vector_length_encoding(this); 5651 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5652 %} 5653 ins_pipe( pipe_slow ); 5654 %} 5655 5656 // Floats vector add 5657 instruct vaddF(vec dst, vec src) %{ 5658 predicate(UseAVX == 0); 5659 match(Set dst (AddVF dst src)); 5660 format %{ "addps $dst,$src\t! add packedF" %} 5661 ins_encode %{ 5662 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5663 %} 5664 ins_pipe( pipe_slow ); 5665 %} 5666 5667 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5668 predicate(UseAVX > 0); 5669 match(Set dst (AddVF src1 src2)); 5670 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5671 ins_encode %{ 5672 int vlen_enc = vector_length_encoding(this); 5673 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5674 %} 5675 ins_pipe( pipe_slow ); 5676 %} 5677 5678 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5679 predicate((UseAVX > 0) && 5680 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5681 match(Set dst (AddVF src (LoadVector mem))); 5682 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5683 ins_encode %{ 5684 int vlen_enc = vector_length_encoding(this); 5685 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5686 %} 5687 ins_pipe( pipe_slow ); 5688 %} 5689 5690 // Doubles vector add 5691 instruct vaddD(vec dst, vec src) %{ 5692 predicate(UseAVX == 0); 5693 match(Set dst (AddVD dst src)); 5694 format %{ "addpd $dst,$src\t! add packedD" %} 5695 ins_encode %{ 5696 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5697 %} 5698 ins_pipe( pipe_slow ); 5699 %} 5700 5701 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5702 predicate(UseAVX > 0); 5703 match(Set dst (AddVD src1 src2)); 5704 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5705 ins_encode %{ 5706 int vlen_enc = vector_length_encoding(this); 5707 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5713 predicate((UseAVX > 0) && 5714 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5715 match(Set dst (AddVD src (LoadVector mem))); 5716 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5717 ins_encode %{ 5718 int vlen_enc = vector_length_encoding(this); 5719 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5720 %} 5721 ins_pipe( pipe_slow ); 5722 %} 5723 5724 // --------------------------------- SUB -------------------------------------- 5725 5726 // Bytes vector sub 5727 instruct vsubB(vec dst, vec src) %{ 5728 predicate(UseAVX == 0); 5729 match(Set dst (SubVB dst src)); 5730 format %{ "psubb $dst,$src\t! sub packedB" %} 5731 ins_encode %{ 5732 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5733 %} 5734 ins_pipe( pipe_slow ); 5735 %} 5736 5737 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5738 predicate(UseAVX > 0); 5739 match(Set dst (SubVB src1 src2)); 5740 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5741 ins_encode %{ 5742 int vlen_enc = vector_length_encoding(this); 5743 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5749 predicate((UseAVX > 0) && 5750 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5751 match(Set dst (SubVB src (LoadVector mem))); 5752 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5753 ins_encode %{ 5754 int vlen_enc = vector_length_encoding(this); 5755 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5756 %} 5757 ins_pipe( pipe_slow ); 5758 %} 5759 5760 // Shorts/Chars vector sub 5761 instruct vsubS(vec dst, vec src) %{ 5762 predicate(UseAVX == 0); 5763 match(Set dst (SubVS dst src)); 5764 format %{ "psubw $dst,$src\t! sub packedS" %} 5765 ins_encode %{ 5766 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 5772 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5773 predicate(UseAVX > 0); 5774 match(Set dst (SubVS src1 src2)); 5775 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5776 ins_encode %{ 5777 int vlen_enc = vector_length_encoding(this); 5778 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5784 predicate((UseAVX > 0) && 5785 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5786 match(Set dst (SubVS src (LoadVector mem))); 5787 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5788 ins_encode %{ 5789 int vlen_enc = vector_length_encoding(this); 5790 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 // Integers vector sub 5796 instruct vsubI(vec dst, vec src) %{ 5797 predicate(UseAVX == 0); 5798 match(Set dst (SubVI dst src)); 5799 format %{ "psubd $dst,$src\t! sub packedI" %} 5800 ins_encode %{ 5801 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5807 predicate(UseAVX > 0); 5808 match(Set dst (SubVI src1 src2)); 5809 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5810 ins_encode %{ 5811 int vlen_enc = vector_length_encoding(this); 5812 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5818 predicate((UseAVX > 0) && 5819 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5820 match(Set dst (SubVI src (LoadVector mem))); 5821 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5822 ins_encode %{ 5823 int vlen_enc = vector_length_encoding(this); 5824 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 // Longs vector sub 5830 instruct vsubL(vec dst, vec src) %{ 5831 predicate(UseAVX == 0); 5832 match(Set dst (SubVL dst src)); 5833 format %{ "psubq $dst,$src\t! sub packedL" %} 5834 ins_encode %{ 5835 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5836 %} 5837 ins_pipe( pipe_slow ); 5838 %} 5839 5840 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5841 predicate(UseAVX > 0); 5842 match(Set dst (SubVL src1 src2)); 5843 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5844 ins_encode %{ 5845 int vlen_enc = vector_length_encoding(this); 5846 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5847 %} 5848 ins_pipe( pipe_slow ); 5849 %} 5850 5851 5852 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5853 predicate((UseAVX > 0) && 5854 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5855 match(Set dst (SubVL src (LoadVector mem))); 5856 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5857 ins_encode %{ 5858 int vlen_enc = vector_length_encoding(this); 5859 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 // Floats vector sub 5865 instruct vsubF(vec dst, vec src) %{ 5866 predicate(UseAVX == 0); 5867 match(Set dst (SubVF dst src)); 5868 format %{ "subps $dst,$src\t! sub packedF" %} 5869 ins_encode %{ 5870 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5871 %} 5872 ins_pipe( pipe_slow ); 5873 %} 5874 5875 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5876 predicate(UseAVX > 0); 5877 match(Set dst (SubVF src1 src2)); 5878 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5879 ins_encode %{ 5880 int vlen_enc = vector_length_encoding(this); 5881 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5882 %} 5883 ins_pipe( pipe_slow ); 5884 %} 5885 5886 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5887 predicate((UseAVX > 0) && 5888 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5889 match(Set dst (SubVF src (LoadVector mem))); 5890 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5891 ins_encode %{ 5892 int vlen_enc = vector_length_encoding(this); 5893 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 // Doubles vector sub 5899 instruct vsubD(vec dst, vec src) %{ 5900 predicate(UseAVX == 0); 5901 match(Set dst (SubVD dst src)); 5902 format %{ "subpd $dst,$src\t! sub packedD" %} 5903 ins_encode %{ 5904 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5905 %} 5906 ins_pipe( pipe_slow ); 5907 %} 5908 5909 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5910 predicate(UseAVX > 0); 5911 match(Set dst (SubVD src1 src2)); 5912 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5913 ins_encode %{ 5914 int vlen_enc = vector_length_encoding(this); 5915 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5916 %} 5917 ins_pipe( pipe_slow ); 5918 %} 5919 5920 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5921 predicate((UseAVX > 0) && 5922 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5923 match(Set dst (SubVD src (LoadVector mem))); 5924 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5925 ins_encode %{ 5926 int vlen_enc = vector_length_encoding(this); 5927 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 // --------------------------------- MUL -------------------------------------- 5933 5934 // Byte vector mul 5935 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5936 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5937 match(Set dst (MulVB src1 src2)); 5938 effect(TEMP dst, TEMP xtmp); 5939 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5940 ins_encode %{ 5941 assert(UseSSE > 3, "required"); 5942 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5943 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5944 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5945 __ psllw($dst$$XMMRegister, 8); 5946 __ psrlw($dst$$XMMRegister, 8); 5947 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5948 %} 5949 ins_pipe( pipe_slow ); 5950 %} 5951 5952 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5953 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5954 match(Set dst (MulVB src1 src2)); 5955 effect(TEMP dst, TEMP xtmp); 5956 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5957 ins_encode %{ 5958 assert(UseSSE > 3, "required"); 5959 // Odd-index elements 5960 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5961 __ psrlw($dst$$XMMRegister, 8); 5962 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5963 __ psrlw($xtmp$$XMMRegister, 8); 5964 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5965 __ psllw($dst$$XMMRegister, 8); 5966 // Even-index elements 5967 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5968 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5969 __ psllw($xtmp$$XMMRegister, 8); 5970 __ psrlw($xtmp$$XMMRegister, 8); 5971 // Combine 5972 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5973 %} 5974 ins_pipe( pipe_slow ); 5975 %} 5976 5977 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5978 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5979 match(Set dst (MulVB src1 src2)); 5980 effect(TEMP xtmp1, TEMP xtmp2); 5981 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5982 ins_encode %{ 5983 int vlen_enc = vector_length_encoding(this); 5984 // Odd-index elements 5985 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5986 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5987 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5988 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5989 // Even-index elements 5990 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5991 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5992 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5993 // Combine 5994 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 5998 5999 // Shorts/Chars vector mul 6000 instruct vmulS(vec dst, vec src) %{ 6001 predicate(UseAVX == 0); 6002 match(Set dst (MulVS dst src)); 6003 format %{ "pmullw $dst,$src\t! mul packedS" %} 6004 ins_encode %{ 6005 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6006 %} 6007 ins_pipe( pipe_slow ); 6008 %} 6009 6010 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6011 predicate(UseAVX > 0); 6012 match(Set dst (MulVS src1 src2)); 6013 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6014 ins_encode %{ 6015 int vlen_enc = vector_length_encoding(this); 6016 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6017 %} 6018 ins_pipe( pipe_slow ); 6019 %} 6020 6021 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6022 predicate((UseAVX > 0) && 6023 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6024 match(Set dst (MulVS src (LoadVector mem))); 6025 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6026 ins_encode %{ 6027 int vlen_enc = vector_length_encoding(this); 6028 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 // Integers vector mul 6034 instruct vmulI(vec dst, vec src) %{ 6035 predicate(UseAVX == 0); 6036 match(Set dst (MulVI dst src)); 6037 format %{ "pmulld $dst,$src\t! mul packedI" %} 6038 ins_encode %{ 6039 assert(UseSSE > 3, "required"); 6040 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6041 %} 6042 ins_pipe( pipe_slow ); 6043 %} 6044 6045 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6046 predicate(UseAVX > 0); 6047 match(Set dst (MulVI src1 src2)); 6048 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6049 ins_encode %{ 6050 int vlen_enc = vector_length_encoding(this); 6051 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6057 predicate((UseAVX > 0) && 6058 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6059 match(Set dst (MulVI src (LoadVector mem))); 6060 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6061 ins_encode %{ 6062 int vlen_enc = vector_length_encoding(this); 6063 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 // Longs vector mul 6069 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6070 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6071 VM_Version::supports_avx512dq()) || 6072 VM_Version::supports_avx512vldq()); 6073 match(Set dst (MulVL src1 src2)); 6074 ins_cost(500); 6075 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6076 ins_encode %{ 6077 assert(UseAVX > 2, "required"); 6078 int vlen_enc = vector_length_encoding(this); 6079 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6080 %} 6081 ins_pipe( pipe_slow ); 6082 %} 6083 6084 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6085 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6086 VM_Version::supports_avx512dq()) || 6087 (Matcher::vector_length_in_bytes(n) > 8 && 6088 VM_Version::supports_avx512vldq())); 6089 match(Set dst (MulVL src (LoadVector mem))); 6090 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6091 ins_cost(500); 6092 ins_encode %{ 6093 assert(UseAVX > 2, "required"); 6094 int vlen_enc = vector_length_encoding(this); 6095 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6096 %} 6097 ins_pipe( pipe_slow ); 6098 %} 6099 6100 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6101 predicate(UseAVX == 0); 6102 match(Set dst (MulVL src1 src2)); 6103 ins_cost(500); 6104 effect(TEMP dst, TEMP xtmp); 6105 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6106 ins_encode %{ 6107 assert(VM_Version::supports_sse4_1(), "required"); 6108 // Get the lo-hi products, only the lower 32 bits is in concerns 6109 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6110 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6111 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6112 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6113 __ psllq($dst$$XMMRegister, 32); 6114 // Get the lo-lo products 6115 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6116 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6117 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6123 predicate(UseAVX > 0 && 6124 ((Matcher::vector_length_in_bytes(n) == 64 && 6125 !VM_Version::supports_avx512dq()) || 6126 (Matcher::vector_length_in_bytes(n) < 64 && 6127 !VM_Version::supports_avx512vldq()))); 6128 match(Set dst (MulVL src1 src2)); 6129 effect(TEMP xtmp1, TEMP xtmp2); 6130 ins_cost(500); 6131 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6132 ins_encode %{ 6133 int vlen_enc = vector_length_encoding(this); 6134 // Get the lo-hi products, only the lower 32 bits is in concerns 6135 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6136 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6137 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6138 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6139 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6140 // Get the lo-lo products 6141 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6142 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6143 %} 6144 ins_pipe( pipe_slow ); 6145 %} 6146 6147 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6148 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6149 match(Set dst (MulVL src1 src2)); 6150 ins_cost(100); 6151 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6152 ins_encode %{ 6153 int vlen_enc = vector_length_encoding(this); 6154 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6155 %} 6156 ins_pipe( pipe_slow ); 6157 %} 6158 6159 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6160 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6161 match(Set dst (MulVL src1 src2)); 6162 ins_cost(100); 6163 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6164 ins_encode %{ 6165 int vlen_enc = vector_length_encoding(this); 6166 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6167 %} 6168 ins_pipe( pipe_slow ); 6169 %} 6170 6171 // Floats vector mul 6172 instruct vmulF(vec dst, vec src) %{ 6173 predicate(UseAVX == 0); 6174 match(Set dst (MulVF dst src)); 6175 format %{ "mulps $dst,$src\t! mul packedF" %} 6176 ins_encode %{ 6177 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6178 %} 6179 ins_pipe( pipe_slow ); 6180 %} 6181 6182 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6183 predicate(UseAVX > 0); 6184 match(Set dst (MulVF src1 src2)); 6185 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6186 ins_encode %{ 6187 int vlen_enc = vector_length_encoding(this); 6188 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6189 %} 6190 ins_pipe( pipe_slow ); 6191 %} 6192 6193 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6194 predicate((UseAVX > 0) && 6195 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6196 match(Set dst (MulVF src (LoadVector mem))); 6197 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6198 ins_encode %{ 6199 int vlen_enc = vector_length_encoding(this); 6200 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6201 %} 6202 ins_pipe( pipe_slow ); 6203 %} 6204 6205 // Doubles vector mul 6206 instruct vmulD(vec dst, vec src) %{ 6207 predicate(UseAVX == 0); 6208 match(Set dst (MulVD dst src)); 6209 format %{ "mulpd $dst,$src\t! mul packedD" %} 6210 ins_encode %{ 6211 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6212 %} 6213 ins_pipe( pipe_slow ); 6214 %} 6215 6216 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6217 predicate(UseAVX > 0); 6218 match(Set dst (MulVD src1 src2)); 6219 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6220 ins_encode %{ 6221 int vlen_enc = vector_length_encoding(this); 6222 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6228 predicate((UseAVX > 0) && 6229 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6230 match(Set dst (MulVD src (LoadVector mem))); 6231 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6232 ins_encode %{ 6233 int vlen_enc = vector_length_encoding(this); 6234 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 // --------------------------------- DIV -------------------------------------- 6240 6241 // Floats vector div 6242 instruct vdivF(vec dst, vec src) %{ 6243 predicate(UseAVX == 0); 6244 match(Set dst (DivVF dst src)); 6245 format %{ "divps $dst,$src\t! div packedF" %} 6246 ins_encode %{ 6247 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6253 predicate(UseAVX > 0); 6254 match(Set dst (DivVF src1 src2)); 6255 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6256 ins_encode %{ 6257 int vlen_enc = vector_length_encoding(this); 6258 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6264 predicate((UseAVX > 0) && 6265 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6266 match(Set dst (DivVF src (LoadVector mem))); 6267 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6268 ins_encode %{ 6269 int vlen_enc = vector_length_encoding(this); 6270 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 // Doubles vector div 6276 instruct vdivD(vec dst, vec src) %{ 6277 predicate(UseAVX == 0); 6278 match(Set dst (DivVD dst src)); 6279 format %{ "divpd $dst,$src\t! div packedD" %} 6280 ins_encode %{ 6281 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6287 predicate(UseAVX > 0); 6288 match(Set dst (DivVD src1 src2)); 6289 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6290 ins_encode %{ 6291 int vlen_enc = vector_length_encoding(this); 6292 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6293 %} 6294 ins_pipe( pipe_slow ); 6295 %} 6296 6297 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6298 predicate((UseAVX > 0) && 6299 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6300 match(Set dst (DivVD src (LoadVector mem))); 6301 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6302 ins_encode %{ 6303 int vlen_enc = vector_length_encoding(this); 6304 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 // ------------------------------ MinMax --------------------------------------- 6310 6311 // Byte, Short, Int vector Min/Max 6312 instruct minmax_reg_sse(vec dst, vec src) %{ 6313 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6314 UseAVX == 0); 6315 match(Set dst (MinV dst src)); 6316 match(Set dst (MaxV dst src)); 6317 format %{ "vector_minmax $dst,$src\t! " %} 6318 ins_encode %{ 6319 assert(UseSSE >= 4, "required"); 6320 6321 int opcode = this->ideal_Opcode(); 6322 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6323 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6329 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6330 UseAVX > 0); 6331 match(Set dst (MinV src1 src2)); 6332 match(Set dst (MaxV src1 src2)); 6333 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6334 ins_encode %{ 6335 int opcode = this->ideal_Opcode(); 6336 int vlen_enc = vector_length_encoding(this); 6337 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6338 6339 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6340 %} 6341 ins_pipe( pipe_slow ); 6342 %} 6343 6344 // Long vector Min/Max 6345 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6346 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6347 UseAVX == 0); 6348 match(Set dst (MinV dst src)); 6349 match(Set dst (MaxV src dst)); 6350 effect(TEMP dst, TEMP tmp); 6351 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6352 ins_encode %{ 6353 assert(UseSSE >= 4, "required"); 6354 6355 int opcode = this->ideal_Opcode(); 6356 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6357 assert(elem_bt == T_LONG, "sanity"); 6358 6359 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6365 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6366 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6367 match(Set dst (MinV src1 src2)); 6368 match(Set dst (MaxV src1 src2)); 6369 effect(TEMP dst); 6370 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6371 ins_encode %{ 6372 int vlen_enc = vector_length_encoding(this); 6373 int opcode = this->ideal_Opcode(); 6374 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6375 assert(elem_bt == T_LONG, "sanity"); 6376 6377 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6383 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6384 Matcher::vector_element_basic_type(n) == T_LONG); 6385 match(Set dst (MinV src1 src2)); 6386 match(Set dst (MaxV src1 src2)); 6387 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6388 ins_encode %{ 6389 assert(UseAVX > 2, "required"); 6390 6391 int vlen_enc = vector_length_encoding(this); 6392 int opcode = this->ideal_Opcode(); 6393 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6394 assert(elem_bt == T_LONG, "sanity"); 6395 6396 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6397 %} 6398 ins_pipe( pipe_slow ); 6399 %} 6400 6401 // Float/Double vector Min/Max 6402 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ 6403 predicate(VM_Version::supports_avx10_2() && 6404 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6405 match(Set dst (MinV a b)); 6406 match(Set dst (MaxV a b)); 6407 format %{ "vector_minmaxFP $dst, $a, $b" %} 6408 ins_encode %{ 6409 int vlen_enc = vector_length_encoding(this); 6410 int opcode = this->ideal_Opcode(); 6411 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6412 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6413 %} 6414 ins_pipe( pipe_slow ); 6415 %} 6416 6417 // Float/Double vector Min/Max 6418 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6419 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && 6420 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6421 UseAVX > 0); 6422 match(Set dst (MinV a b)); 6423 match(Set dst (MaxV a b)); 6424 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6425 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6426 ins_encode %{ 6427 assert(UseAVX > 0, "required"); 6428 6429 int opcode = this->ideal_Opcode(); 6430 int vlen_enc = vector_length_encoding(this); 6431 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6432 6433 __ vminmax_fp(opcode, elem_bt, 6434 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6435 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6436 %} 6437 ins_pipe( pipe_slow ); 6438 %} 6439 6440 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6441 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && 6442 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6443 match(Set dst (MinV a b)); 6444 match(Set dst (MaxV a b)); 6445 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6446 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6447 ins_encode %{ 6448 assert(UseAVX > 2, "required"); 6449 6450 int opcode = this->ideal_Opcode(); 6451 int vlen_enc = vector_length_encoding(this); 6452 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6453 6454 __ evminmax_fp(opcode, elem_bt, 6455 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6456 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6457 %} 6458 ins_pipe( pipe_slow ); 6459 %} 6460 6461 // ------------------------------ Unsigned vector Min/Max ---------------------- 6462 6463 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6464 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6465 match(Set dst (UMinV a b)); 6466 match(Set dst (UMaxV a b)); 6467 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6468 ins_encode %{ 6469 int opcode = this->ideal_Opcode(); 6470 int vlen_enc = vector_length_encoding(this); 6471 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6472 assert(is_integral_type(elem_bt), ""); 6473 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6474 %} 6475 ins_pipe( pipe_slow ); 6476 %} 6477 6478 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6479 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6480 match(Set dst (UMinV a (LoadVector b))); 6481 match(Set dst (UMaxV a (LoadVector b))); 6482 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6483 ins_encode %{ 6484 int opcode = this->ideal_Opcode(); 6485 int vlen_enc = vector_length_encoding(this); 6486 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6487 assert(is_integral_type(elem_bt), ""); 6488 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6494 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6495 match(Set dst (UMinV a b)); 6496 match(Set dst (UMaxV a b)); 6497 effect(TEMP xtmp1, TEMP xtmp2); 6498 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6499 ins_encode %{ 6500 int opcode = this->ideal_Opcode(); 6501 int vlen_enc = vector_length_encoding(this); 6502 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6508 match(Set dst (UMinV (Binary dst src2) mask)); 6509 match(Set dst (UMaxV (Binary dst src2) mask)); 6510 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6511 ins_encode %{ 6512 int vlen_enc = vector_length_encoding(this); 6513 BasicType bt = Matcher::vector_element_basic_type(this); 6514 int opc = this->ideal_Opcode(); 6515 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6516 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6517 %} 6518 ins_pipe( pipe_slow ); 6519 %} 6520 6521 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6522 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6523 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6524 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6525 ins_encode %{ 6526 int vlen_enc = vector_length_encoding(this); 6527 BasicType bt = Matcher::vector_element_basic_type(this); 6528 int opc = this->ideal_Opcode(); 6529 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6530 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 // --------------------------------- Signum/CopySign --------------------------- 6536 6537 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6538 match(Set dst (SignumF dst (Binary zero one))); 6539 effect(KILL cr); 6540 format %{ "signumF $dst, $dst" %} 6541 ins_encode %{ 6542 int opcode = this->ideal_Opcode(); 6543 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6549 match(Set dst (SignumD dst (Binary zero one))); 6550 effect(KILL cr); 6551 format %{ "signumD $dst, $dst" %} 6552 ins_encode %{ 6553 int opcode = this->ideal_Opcode(); 6554 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6555 %} 6556 ins_pipe( pipe_slow ); 6557 %} 6558 6559 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6560 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6561 match(Set dst (SignumVF src (Binary zero one))); 6562 match(Set dst (SignumVD src (Binary zero one))); 6563 effect(TEMP dst, TEMP xtmp1); 6564 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6565 ins_encode %{ 6566 int opcode = this->ideal_Opcode(); 6567 int vec_enc = vector_length_encoding(this); 6568 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6569 $xtmp1$$XMMRegister, vec_enc); 6570 %} 6571 ins_pipe( pipe_slow ); 6572 %} 6573 6574 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6575 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6576 match(Set dst (SignumVF src (Binary zero one))); 6577 match(Set dst (SignumVD src (Binary zero one))); 6578 effect(TEMP dst, TEMP ktmp1); 6579 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6580 ins_encode %{ 6581 int opcode = this->ideal_Opcode(); 6582 int vec_enc = vector_length_encoding(this); 6583 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6584 $ktmp1$$KRegister, vec_enc); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 // --------------------------------------- 6590 // For copySign use 0xE4 as writemask for vpternlog 6591 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6592 // C (xmm2) is set to 0x7FFFFFFF 6593 // Wherever xmm2 is 0, we want to pick from B (sign) 6594 // Wherever xmm2 is 1, we want to pick from A (src) 6595 // 6596 // A B C Result 6597 // 0 0 0 0 6598 // 0 0 1 0 6599 // 0 1 0 1 6600 // 0 1 1 0 6601 // 1 0 0 0 6602 // 1 0 1 1 6603 // 1 1 0 1 6604 // 1 1 1 1 6605 // 6606 // Result going from high bit to low bit is 0x11100100 = 0xe4 6607 // --------------------------------------- 6608 6609 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6610 match(Set dst (CopySignF dst src)); 6611 effect(TEMP tmp1, TEMP tmp2); 6612 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6613 ins_encode %{ 6614 __ movl($tmp2$$Register, 0x7FFFFFFF); 6615 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6616 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6617 %} 6618 ins_pipe( pipe_slow ); 6619 %} 6620 6621 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6622 match(Set dst (CopySignD dst (Binary src zero))); 6623 ins_cost(100); 6624 effect(TEMP tmp1, TEMP tmp2); 6625 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6626 ins_encode %{ 6627 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6628 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6629 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 //----------------------------- CompressBits/ExpandBits ------------------------ 6635 6636 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6637 predicate(n->bottom_type()->isa_int()); 6638 match(Set dst (CompressBits src mask)); 6639 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6640 ins_encode %{ 6641 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6642 %} 6643 ins_pipe( pipe_slow ); 6644 %} 6645 6646 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6647 predicate(n->bottom_type()->isa_int()); 6648 match(Set dst (ExpandBits src mask)); 6649 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6650 ins_encode %{ 6651 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6657 predicate(n->bottom_type()->isa_int()); 6658 match(Set dst (CompressBits src (LoadI mask))); 6659 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6660 ins_encode %{ 6661 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6667 predicate(n->bottom_type()->isa_int()); 6668 match(Set dst (ExpandBits src (LoadI mask))); 6669 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6670 ins_encode %{ 6671 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6672 %} 6673 ins_pipe( pipe_slow ); 6674 %} 6675 6676 // --------------------------------- Sqrt -------------------------------------- 6677 6678 instruct vsqrtF_reg(vec dst, vec src) %{ 6679 match(Set dst (SqrtVF src)); 6680 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6681 ins_encode %{ 6682 assert(UseAVX > 0, "required"); 6683 int vlen_enc = vector_length_encoding(this); 6684 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vsqrtF_mem(vec dst, memory mem) %{ 6690 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6691 match(Set dst (SqrtVF (LoadVector mem))); 6692 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6693 ins_encode %{ 6694 assert(UseAVX > 0, "required"); 6695 int vlen_enc = vector_length_encoding(this); 6696 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6697 %} 6698 ins_pipe( pipe_slow ); 6699 %} 6700 6701 // Floating point vector sqrt 6702 instruct vsqrtD_reg(vec dst, vec src) %{ 6703 match(Set dst (SqrtVD src)); 6704 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6705 ins_encode %{ 6706 assert(UseAVX > 0, "required"); 6707 int vlen_enc = vector_length_encoding(this); 6708 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vsqrtD_mem(vec dst, memory mem) %{ 6714 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6715 match(Set dst (SqrtVD (LoadVector mem))); 6716 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6717 ins_encode %{ 6718 assert(UseAVX > 0, "required"); 6719 int vlen_enc = vector_length_encoding(this); 6720 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6721 %} 6722 ins_pipe( pipe_slow ); 6723 %} 6724 6725 // ------------------------------ Shift --------------------------------------- 6726 6727 // Left and right shift count vectors are the same on x86 6728 // (only lowest bits of xmm reg are used for count). 6729 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6730 match(Set dst (LShiftCntV cnt)); 6731 match(Set dst (RShiftCntV cnt)); 6732 format %{ "movdl $dst,$cnt\t! load shift count" %} 6733 ins_encode %{ 6734 __ movdl($dst$$XMMRegister, $cnt$$Register); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 // Byte vector shift 6740 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6741 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6742 match(Set dst ( LShiftVB src shift)); 6743 match(Set dst ( RShiftVB src shift)); 6744 match(Set dst (URShiftVB src shift)); 6745 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6746 format %{"vector_byte_shift $dst,$src,$shift" %} 6747 ins_encode %{ 6748 assert(UseSSE > 3, "required"); 6749 int opcode = this->ideal_Opcode(); 6750 bool sign = (opcode != Op_URShiftVB); 6751 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6752 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6753 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6754 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6755 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6756 %} 6757 ins_pipe( pipe_slow ); 6758 %} 6759 6760 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6761 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6762 UseAVX <= 1); 6763 match(Set dst ( LShiftVB src shift)); 6764 match(Set dst ( RShiftVB src shift)); 6765 match(Set dst (URShiftVB src shift)); 6766 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6767 format %{"vector_byte_shift $dst,$src,$shift" %} 6768 ins_encode %{ 6769 assert(UseSSE > 3, "required"); 6770 int opcode = this->ideal_Opcode(); 6771 bool sign = (opcode != Op_URShiftVB); 6772 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6773 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6774 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6775 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6776 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6778 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6779 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6780 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6786 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6787 UseAVX > 1); 6788 match(Set dst ( LShiftVB src shift)); 6789 match(Set dst ( RShiftVB src shift)); 6790 match(Set dst (URShiftVB src shift)); 6791 effect(TEMP dst, TEMP tmp); 6792 format %{"vector_byte_shift $dst,$src,$shift" %} 6793 ins_encode %{ 6794 int opcode = this->ideal_Opcode(); 6795 bool sign = (opcode != Op_URShiftVB); 6796 int vlen_enc = Assembler::AVX_256bit; 6797 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6798 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6799 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6800 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6801 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6802 %} 6803 ins_pipe( pipe_slow ); 6804 %} 6805 6806 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6807 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6808 match(Set dst ( LShiftVB src shift)); 6809 match(Set dst ( RShiftVB src shift)); 6810 match(Set dst (URShiftVB src shift)); 6811 effect(TEMP dst, TEMP tmp); 6812 format %{"vector_byte_shift $dst,$src,$shift" %} 6813 ins_encode %{ 6814 assert(UseAVX > 1, "required"); 6815 int opcode = this->ideal_Opcode(); 6816 bool sign = (opcode != Op_URShiftVB); 6817 int vlen_enc = Assembler::AVX_256bit; 6818 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6819 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6820 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6821 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6822 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6823 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6824 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6825 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6826 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6832 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6833 match(Set dst ( LShiftVB src shift)); 6834 match(Set dst (RShiftVB src shift)); 6835 match(Set dst (URShiftVB src shift)); 6836 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6837 format %{"vector_byte_shift $dst,$src,$shift" %} 6838 ins_encode %{ 6839 assert(UseAVX > 2, "required"); 6840 int opcode = this->ideal_Opcode(); 6841 bool sign = (opcode != Op_URShiftVB); 6842 int vlen_enc = Assembler::AVX_512bit; 6843 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6844 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6845 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6846 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6847 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6848 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6849 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6850 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6851 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6852 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6853 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6854 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6855 %} 6856 ins_pipe( pipe_slow ); 6857 %} 6858 6859 // Shorts vector logical right shift produces incorrect Java result 6860 // for negative data because java code convert short value into int with 6861 // sign extension before a shift. But char vectors are fine since chars are 6862 // unsigned values. 6863 // Shorts/Chars vector left shift 6864 instruct vshiftS(vec dst, vec src, vec shift) %{ 6865 predicate(!n->as_ShiftV()->is_var_shift()); 6866 match(Set dst ( LShiftVS src shift)); 6867 match(Set dst ( RShiftVS src shift)); 6868 match(Set dst (URShiftVS src shift)); 6869 effect(TEMP dst, USE src, USE shift); 6870 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6871 ins_encode %{ 6872 int opcode = this->ideal_Opcode(); 6873 if (UseAVX > 0) { 6874 int vlen_enc = vector_length_encoding(this); 6875 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6876 } else { 6877 int vlen = Matcher::vector_length(this); 6878 if (vlen == 2) { 6879 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6880 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6881 } else if (vlen == 4) { 6882 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6883 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6884 } else { 6885 assert (vlen == 8, "sanity"); 6886 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6887 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6888 } 6889 } 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 // Integers vector left shift 6895 instruct vshiftI(vec dst, vec src, vec shift) %{ 6896 predicate(!n->as_ShiftV()->is_var_shift()); 6897 match(Set dst ( LShiftVI src shift)); 6898 match(Set dst ( RShiftVI src shift)); 6899 match(Set dst (URShiftVI src shift)); 6900 effect(TEMP dst, USE src, USE shift); 6901 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6902 ins_encode %{ 6903 int opcode = this->ideal_Opcode(); 6904 if (UseAVX > 0) { 6905 int vlen_enc = vector_length_encoding(this); 6906 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6907 } else { 6908 int vlen = Matcher::vector_length(this); 6909 if (vlen == 2) { 6910 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6911 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6912 } else { 6913 assert(vlen == 4, "sanity"); 6914 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6915 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6916 } 6917 } 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 // Integers vector left constant shift 6923 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6924 match(Set dst (LShiftVI src (LShiftCntV shift))); 6925 match(Set dst (RShiftVI src (RShiftCntV shift))); 6926 match(Set dst (URShiftVI src (RShiftCntV shift))); 6927 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6928 ins_encode %{ 6929 int opcode = this->ideal_Opcode(); 6930 if (UseAVX > 0) { 6931 int vector_len = vector_length_encoding(this); 6932 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6933 } else { 6934 int vlen = Matcher::vector_length(this); 6935 if (vlen == 2) { 6936 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6937 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6938 } else { 6939 assert(vlen == 4, "sanity"); 6940 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6941 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6942 } 6943 } 6944 %} 6945 ins_pipe( pipe_slow ); 6946 %} 6947 6948 // Longs vector shift 6949 instruct vshiftL(vec dst, vec src, vec shift) %{ 6950 predicate(!n->as_ShiftV()->is_var_shift()); 6951 match(Set dst ( LShiftVL src shift)); 6952 match(Set dst (URShiftVL src shift)); 6953 effect(TEMP dst, USE src, USE shift); 6954 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6955 ins_encode %{ 6956 int opcode = this->ideal_Opcode(); 6957 if (UseAVX > 0) { 6958 int vlen_enc = vector_length_encoding(this); 6959 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6960 } else { 6961 assert(Matcher::vector_length(this) == 2, ""); 6962 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6963 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6964 } 6965 %} 6966 ins_pipe( pipe_slow ); 6967 %} 6968 6969 // Longs vector constant shift 6970 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6971 match(Set dst (LShiftVL src (LShiftCntV shift))); 6972 match(Set dst (URShiftVL src (RShiftCntV shift))); 6973 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6974 ins_encode %{ 6975 int opcode = this->ideal_Opcode(); 6976 if (UseAVX > 0) { 6977 int vector_len = vector_length_encoding(this); 6978 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6979 } else { 6980 assert(Matcher::vector_length(this) == 2, ""); 6981 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6982 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6983 } 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 // -------------------ArithmeticRightShift ----------------------------------- 6989 // Long vector arithmetic right shift 6990 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6991 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6992 match(Set dst (RShiftVL src shift)); 6993 effect(TEMP dst, TEMP tmp); 6994 format %{ "vshiftq $dst,$src,$shift" %} 6995 ins_encode %{ 6996 uint vlen = Matcher::vector_length(this); 6997 if (vlen == 2) { 6998 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6999 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7000 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7001 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7002 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7003 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7004 } else { 7005 assert(vlen == 4, "sanity"); 7006 assert(UseAVX > 1, "required"); 7007 int vlen_enc = Assembler::AVX_256bit; 7008 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7009 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7010 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7011 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7012 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7013 } 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7019 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7020 match(Set dst (RShiftVL src shift)); 7021 format %{ "vshiftq $dst,$src,$shift" %} 7022 ins_encode %{ 7023 int vlen_enc = vector_length_encoding(this); 7024 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 // ------------------- Variable Shift ----------------------------- 7030 // Byte variable shift 7031 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7032 predicate(Matcher::vector_length(n) <= 8 && 7033 n->as_ShiftV()->is_var_shift() && 7034 !VM_Version::supports_avx512bw()); 7035 match(Set dst ( LShiftVB src shift)); 7036 match(Set dst ( RShiftVB src shift)); 7037 match(Set dst (URShiftVB src shift)); 7038 effect(TEMP dst, TEMP vtmp); 7039 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7040 ins_encode %{ 7041 assert(UseAVX >= 2, "required"); 7042 7043 int opcode = this->ideal_Opcode(); 7044 int vlen_enc = Assembler::AVX_128bit; 7045 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7046 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7052 predicate(Matcher::vector_length(n) == 16 && 7053 n->as_ShiftV()->is_var_shift() && 7054 !VM_Version::supports_avx512bw()); 7055 match(Set dst ( LShiftVB src shift)); 7056 match(Set dst ( RShiftVB src shift)); 7057 match(Set dst (URShiftVB src shift)); 7058 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7059 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7060 ins_encode %{ 7061 assert(UseAVX >= 2, "required"); 7062 7063 int opcode = this->ideal_Opcode(); 7064 int vlen_enc = Assembler::AVX_128bit; 7065 // Shift lower half and get word result in dst 7066 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7067 7068 // Shift upper half and get word result in vtmp1 7069 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7070 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7071 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7072 7073 // Merge and down convert the two word results to byte in dst 7074 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7075 %} 7076 ins_pipe( pipe_slow ); 7077 %} 7078 7079 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7080 predicate(Matcher::vector_length(n) == 32 && 7081 n->as_ShiftV()->is_var_shift() && 7082 !VM_Version::supports_avx512bw()); 7083 match(Set dst ( LShiftVB src shift)); 7084 match(Set dst ( RShiftVB src shift)); 7085 match(Set dst (URShiftVB src shift)); 7086 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7087 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7088 ins_encode %{ 7089 assert(UseAVX >= 2, "required"); 7090 7091 int opcode = this->ideal_Opcode(); 7092 int vlen_enc = Assembler::AVX_128bit; 7093 // Process lower 128 bits and get result in dst 7094 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7095 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7096 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7097 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7098 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7099 7100 // Process higher 128 bits and get result in vtmp3 7101 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7102 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7103 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7104 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7105 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7106 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7107 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7108 7109 // Merge the two results in dst 7110 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7116 predicate(Matcher::vector_length(n) <= 32 && 7117 n->as_ShiftV()->is_var_shift() && 7118 VM_Version::supports_avx512bw()); 7119 match(Set dst ( LShiftVB src shift)); 7120 match(Set dst ( RShiftVB src shift)); 7121 match(Set dst (URShiftVB src shift)); 7122 effect(TEMP dst, TEMP vtmp); 7123 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7124 ins_encode %{ 7125 assert(UseAVX > 2, "required"); 7126 7127 int opcode = this->ideal_Opcode(); 7128 int vlen_enc = vector_length_encoding(this); 7129 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7130 %} 7131 ins_pipe( pipe_slow ); 7132 %} 7133 7134 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7135 predicate(Matcher::vector_length(n) == 64 && 7136 n->as_ShiftV()->is_var_shift() && 7137 VM_Version::supports_avx512bw()); 7138 match(Set dst ( LShiftVB src shift)); 7139 match(Set dst ( RShiftVB src shift)); 7140 match(Set dst (URShiftVB src shift)); 7141 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7142 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7143 ins_encode %{ 7144 assert(UseAVX > 2, "required"); 7145 7146 int opcode = this->ideal_Opcode(); 7147 int vlen_enc = Assembler::AVX_256bit; 7148 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7149 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7150 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7151 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7152 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7153 %} 7154 ins_pipe( pipe_slow ); 7155 %} 7156 7157 // Short variable shift 7158 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7159 predicate(Matcher::vector_length(n) <= 8 && 7160 n->as_ShiftV()->is_var_shift() && 7161 !VM_Version::supports_avx512bw()); 7162 match(Set dst ( LShiftVS src shift)); 7163 match(Set dst ( RShiftVS src shift)); 7164 match(Set dst (URShiftVS src shift)); 7165 effect(TEMP dst, TEMP vtmp); 7166 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7167 ins_encode %{ 7168 assert(UseAVX >= 2, "required"); 7169 7170 int opcode = this->ideal_Opcode(); 7171 bool sign = (opcode != Op_URShiftVS); 7172 int vlen_enc = Assembler::AVX_256bit; 7173 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7174 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7175 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7176 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7177 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7178 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7179 %} 7180 ins_pipe( pipe_slow ); 7181 %} 7182 7183 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7184 predicate(Matcher::vector_length(n) == 16 && 7185 n->as_ShiftV()->is_var_shift() && 7186 !VM_Version::supports_avx512bw()); 7187 match(Set dst ( LShiftVS src shift)); 7188 match(Set dst ( RShiftVS src shift)); 7189 match(Set dst (URShiftVS src shift)); 7190 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7191 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7192 ins_encode %{ 7193 assert(UseAVX >= 2, "required"); 7194 7195 int opcode = this->ideal_Opcode(); 7196 bool sign = (opcode != Op_URShiftVS); 7197 int vlen_enc = Assembler::AVX_256bit; 7198 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7199 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7200 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7201 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7202 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7203 7204 // Shift upper half, with result in dst using vtmp1 as TEMP 7205 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7206 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7207 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7208 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7209 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7210 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7211 7212 // Merge lower and upper half result into dst 7213 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7214 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7220 predicate(n->as_ShiftV()->is_var_shift() && 7221 VM_Version::supports_avx512bw()); 7222 match(Set dst ( LShiftVS src shift)); 7223 match(Set dst ( RShiftVS src shift)); 7224 match(Set dst (URShiftVS src shift)); 7225 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7226 ins_encode %{ 7227 assert(UseAVX > 2, "required"); 7228 7229 int opcode = this->ideal_Opcode(); 7230 int vlen_enc = vector_length_encoding(this); 7231 if (!VM_Version::supports_avx512vl()) { 7232 vlen_enc = Assembler::AVX_512bit; 7233 } 7234 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7235 %} 7236 ins_pipe( pipe_slow ); 7237 %} 7238 7239 //Integer variable shift 7240 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7241 predicate(n->as_ShiftV()->is_var_shift()); 7242 match(Set dst ( LShiftVI src shift)); 7243 match(Set dst ( RShiftVI src shift)); 7244 match(Set dst (URShiftVI src shift)); 7245 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7246 ins_encode %{ 7247 assert(UseAVX >= 2, "required"); 7248 7249 int opcode = this->ideal_Opcode(); 7250 int vlen_enc = vector_length_encoding(this); 7251 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7252 %} 7253 ins_pipe( pipe_slow ); 7254 %} 7255 7256 //Long variable shift 7257 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7258 predicate(n->as_ShiftV()->is_var_shift()); 7259 match(Set dst ( LShiftVL src shift)); 7260 match(Set dst (URShiftVL src shift)); 7261 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7262 ins_encode %{ 7263 assert(UseAVX >= 2, "required"); 7264 7265 int opcode = this->ideal_Opcode(); 7266 int vlen_enc = vector_length_encoding(this); 7267 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7268 %} 7269 ins_pipe( pipe_slow ); 7270 %} 7271 7272 //Long variable right shift arithmetic 7273 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7274 predicate(Matcher::vector_length(n) <= 4 && 7275 n->as_ShiftV()->is_var_shift() && 7276 UseAVX == 2); 7277 match(Set dst (RShiftVL src shift)); 7278 effect(TEMP dst, TEMP vtmp); 7279 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7280 ins_encode %{ 7281 int opcode = this->ideal_Opcode(); 7282 int vlen_enc = vector_length_encoding(this); 7283 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7284 $vtmp$$XMMRegister); 7285 %} 7286 ins_pipe( pipe_slow ); 7287 %} 7288 7289 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7290 predicate(n->as_ShiftV()->is_var_shift() && 7291 UseAVX > 2); 7292 match(Set dst (RShiftVL src shift)); 7293 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7294 ins_encode %{ 7295 int opcode = this->ideal_Opcode(); 7296 int vlen_enc = vector_length_encoding(this); 7297 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7298 %} 7299 ins_pipe( pipe_slow ); 7300 %} 7301 7302 // --------------------------------- AND -------------------------------------- 7303 7304 instruct vand(vec dst, vec src) %{ 7305 predicate(UseAVX == 0); 7306 match(Set dst (AndV dst src)); 7307 format %{ "pand $dst,$src\t! and vectors" %} 7308 ins_encode %{ 7309 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7315 predicate(UseAVX > 0); 7316 match(Set dst (AndV src1 src2)); 7317 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7318 ins_encode %{ 7319 int vlen_enc = vector_length_encoding(this); 7320 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vand_mem(vec dst, vec src, memory mem) %{ 7326 predicate((UseAVX > 0) && 7327 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7328 match(Set dst (AndV src (LoadVector mem))); 7329 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7330 ins_encode %{ 7331 int vlen_enc = vector_length_encoding(this); 7332 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 // --------------------------------- OR --------------------------------------- 7338 7339 instruct vor(vec dst, vec src) %{ 7340 predicate(UseAVX == 0); 7341 match(Set dst (OrV dst src)); 7342 format %{ "por $dst,$src\t! or vectors" %} 7343 ins_encode %{ 7344 __ por($dst$$XMMRegister, $src$$XMMRegister); 7345 %} 7346 ins_pipe( pipe_slow ); 7347 %} 7348 7349 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7350 predicate(UseAVX > 0); 7351 match(Set dst (OrV src1 src2)); 7352 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7353 ins_encode %{ 7354 int vlen_enc = vector_length_encoding(this); 7355 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7356 %} 7357 ins_pipe( pipe_slow ); 7358 %} 7359 7360 instruct vor_mem(vec dst, vec src, memory mem) %{ 7361 predicate((UseAVX > 0) && 7362 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7363 match(Set dst (OrV src (LoadVector mem))); 7364 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7365 ins_encode %{ 7366 int vlen_enc = vector_length_encoding(this); 7367 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7368 %} 7369 ins_pipe( pipe_slow ); 7370 %} 7371 7372 // --------------------------------- XOR -------------------------------------- 7373 7374 instruct vxor(vec dst, vec src) %{ 7375 predicate(UseAVX == 0); 7376 match(Set dst (XorV dst src)); 7377 format %{ "pxor $dst,$src\t! xor vectors" %} 7378 ins_encode %{ 7379 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7385 predicate(UseAVX > 0); 7386 match(Set dst (XorV src1 src2)); 7387 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7388 ins_encode %{ 7389 int vlen_enc = vector_length_encoding(this); 7390 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7396 predicate((UseAVX > 0) && 7397 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7398 match(Set dst (XorV src (LoadVector mem))); 7399 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7400 ins_encode %{ 7401 int vlen_enc = vector_length_encoding(this); 7402 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7403 %} 7404 ins_pipe( pipe_slow ); 7405 %} 7406 7407 // --------------------------------- VectorCast -------------------------------------- 7408 7409 instruct vcastBtoX(vec dst, vec src) %{ 7410 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7411 match(Set dst (VectorCastB2X src)); 7412 format %{ "vector_cast_b2x $dst,$src\t!" %} 7413 ins_encode %{ 7414 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7415 int vlen_enc = vector_length_encoding(this); 7416 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7417 %} 7418 ins_pipe( pipe_slow ); 7419 %} 7420 7421 instruct vcastBtoD(legVec dst, legVec src) %{ 7422 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7423 match(Set dst (VectorCastB2X src)); 7424 format %{ "vector_cast_b2x $dst,$src\t!" %} 7425 ins_encode %{ 7426 int vlen_enc = vector_length_encoding(this); 7427 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7428 %} 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 instruct castStoX(vec dst, vec src) %{ 7433 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7434 Matcher::vector_length(n->in(1)) <= 8 && // src 7435 Matcher::vector_element_basic_type(n) == T_BYTE); 7436 match(Set dst (VectorCastS2X src)); 7437 format %{ "vector_cast_s2x $dst,$src" %} 7438 ins_encode %{ 7439 assert(UseAVX > 0, "required"); 7440 7441 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7442 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7443 %} 7444 ins_pipe( pipe_slow ); 7445 %} 7446 7447 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7448 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7449 Matcher::vector_length(n->in(1)) == 16 && // src 7450 Matcher::vector_element_basic_type(n) == T_BYTE); 7451 effect(TEMP dst, TEMP vtmp); 7452 match(Set dst (VectorCastS2X src)); 7453 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7454 ins_encode %{ 7455 assert(UseAVX > 0, "required"); 7456 7457 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7458 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7459 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7460 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7461 %} 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 instruct vcastStoX_evex(vec dst, vec src) %{ 7466 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7467 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7468 match(Set dst (VectorCastS2X src)); 7469 format %{ "vector_cast_s2x $dst,$src\t!" %} 7470 ins_encode %{ 7471 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7472 int src_vlen_enc = vector_length_encoding(this, $src); 7473 int vlen_enc = vector_length_encoding(this); 7474 switch (to_elem_bt) { 7475 case T_BYTE: 7476 if (!VM_Version::supports_avx512vl()) { 7477 vlen_enc = Assembler::AVX_512bit; 7478 } 7479 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7480 break; 7481 case T_INT: 7482 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7483 break; 7484 case T_FLOAT: 7485 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7486 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7487 break; 7488 case T_LONG: 7489 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7490 break; 7491 case T_DOUBLE: { 7492 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7493 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7494 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7495 break; 7496 } 7497 default: 7498 ShouldNotReachHere(); 7499 } 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct castItoX(vec dst, vec src) %{ 7505 predicate(UseAVX <= 2 && 7506 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7507 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7508 match(Set dst (VectorCastI2X src)); 7509 format %{ "vector_cast_i2x $dst,$src" %} 7510 ins_encode %{ 7511 assert(UseAVX > 0, "required"); 7512 7513 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7514 int vlen_enc = vector_length_encoding(this, $src); 7515 7516 if (to_elem_bt == T_BYTE) { 7517 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7518 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7519 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7520 } else { 7521 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7522 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7523 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7524 } 7525 %} 7526 ins_pipe( pipe_slow ); 7527 %} 7528 7529 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7530 predicate(UseAVX <= 2 && 7531 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7532 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7533 match(Set dst (VectorCastI2X src)); 7534 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7535 effect(TEMP dst, TEMP vtmp); 7536 ins_encode %{ 7537 assert(UseAVX > 0, "required"); 7538 7539 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7540 int vlen_enc = vector_length_encoding(this, $src); 7541 7542 if (to_elem_bt == T_BYTE) { 7543 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7544 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7545 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7546 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7547 } else { 7548 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7549 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7550 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7551 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7552 } 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vcastItoX_evex(vec dst, vec src) %{ 7558 predicate(UseAVX > 2 || 7559 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7560 match(Set dst (VectorCastI2X src)); 7561 format %{ "vector_cast_i2x $dst,$src\t!" %} 7562 ins_encode %{ 7563 assert(UseAVX > 0, "required"); 7564 7565 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7566 int src_vlen_enc = vector_length_encoding(this, $src); 7567 int dst_vlen_enc = vector_length_encoding(this); 7568 switch (dst_elem_bt) { 7569 case T_BYTE: 7570 if (!VM_Version::supports_avx512vl()) { 7571 src_vlen_enc = Assembler::AVX_512bit; 7572 } 7573 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7574 break; 7575 case T_SHORT: 7576 if (!VM_Version::supports_avx512vl()) { 7577 src_vlen_enc = Assembler::AVX_512bit; 7578 } 7579 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7580 break; 7581 case T_FLOAT: 7582 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7583 break; 7584 case T_LONG: 7585 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7586 break; 7587 case T_DOUBLE: 7588 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7589 break; 7590 default: 7591 ShouldNotReachHere(); 7592 } 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 instruct vcastLtoBS(vec dst, vec src) %{ 7598 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7599 UseAVX <= 2); 7600 match(Set dst (VectorCastL2X src)); 7601 format %{ "vector_cast_l2x $dst,$src" %} 7602 ins_encode %{ 7603 assert(UseAVX > 0, "required"); 7604 7605 int vlen = Matcher::vector_length_in_bytes(this, $src); 7606 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7607 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7608 : ExternalAddress(vector_int_to_short_mask()); 7609 if (vlen <= 16) { 7610 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7611 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7612 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7613 } else { 7614 assert(vlen <= 32, "required"); 7615 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7616 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7617 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7618 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7619 } 7620 if (to_elem_bt == T_BYTE) { 7621 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7622 } 7623 %} 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 instruct vcastLtoX_evex(vec dst, vec src) %{ 7628 predicate(UseAVX > 2 || 7629 (Matcher::vector_element_basic_type(n) == T_INT || 7630 Matcher::vector_element_basic_type(n) == T_FLOAT || 7631 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7632 match(Set dst (VectorCastL2X src)); 7633 format %{ "vector_cast_l2x $dst,$src\t!" %} 7634 ins_encode %{ 7635 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7636 int vlen = Matcher::vector_length_in_bytes(this, $src); 7637 int vlen_enc = vector_length_encoding(this, $src); 7638 switch (to_elem_bt) { 7639 case T_BYTE: 7640 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7641 vlen_enc = Assembler::AVX_512bit; 7642 } 7643 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7644 break; 7645 case T_SHORT: 7646 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7647 vlen_enc = Assembler::AVX_512bit; 7648 } 7649 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7650 break; 7651 case T_INT: 7652 if (vlen == 8) { 7653 if ($dst$$XMMRegister != $src$$XMMRegister) { 7654 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7655 } 7656 } else if (vlen == 16) { 7657 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7658 } else if (vlen == 32) { 7659 if (UseAVX > 2) { 7660 if (!VM_Version::supports_avx512vl()) { 7661 vlen_enc = Assembler::AVX_512bit; 7662 } 7663 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7664 } else { 7665 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7666 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7667 } 7668 } else { // vlen == 64 7669 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7670 } 7671 break; 7672 case T_FLOAT: 7673 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7674 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7675 break; 7676 case T_DOUBLE: 7677 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7678 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7679 break; 7680 7681 default: assert(false, "%s", type2name(to_elem_bt)); 7682 } 7683 %} 7684 ins_pipe( pipe_slow ); 7685 %} 7686 7687 instruct vcastFtoD_reg(vec dst, vec src) %{ 7688 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7689 match(Set dst (VectorCastF2X src)); 7690 format %{ "vector_cast_f2d $dst,$src\t!" %} 7691 ins_encode %{ 7692 int vlen_enc = vector_length_encoding(this); 7693 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 7699 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7700 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7701 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7702 match(Set dst (VectorCastF2X src)); 7703 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7704 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7705 ins_encode %{ 7706 int vlen_enc = vector_length_encoding(this, $src); 7707 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7708 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7709 // 32 bit addresses for register indirect addressing mode since stub constants 7710 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7711 // However, targets are free to increase this limit, but having a large code cache size 7712 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7713 // cap we save a temporary register allocation which in limiting case can prevent 7714 // spilling in high register pressure blocks. 7715 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7716 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7717 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7723 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7724 is_integral_type(Matcher::vector_element_basic_type(n))); 7725 match(Set dst (VectorCastF2X src)); 7726 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7727 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7728 ins_encode %{ 7729 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7730 if (to_elem_bt == T_LONG) { 7731 int vlen_enc = vector_length_encoding(this); 7732 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7733 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7734 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7735 } else { 7736 int vlen_enc = vector_length_encoding(this, $src); 7737 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7738 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7739 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7740 } 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vcastDtoF_reg(vec dst, vec src) %{ 7746 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7747 match(Set dst (VectorCastD2X src)); 7748 format %{ "vector_cast_d2x $dst,$src\t!" %} 7749 ins_encode %{ 7750 int vlen_enc = vector_length_encoding(this, $src); 7751 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7757 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7758 is_integral_type(Matcher::vector_element_basic_type(n))); 7759 match(Set dst (VectorCastD2X src)); 7760 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7761 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7762 ins_encode %{ 7763 int vlen_enc = vector_length_encoding(this, $src); 7764 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7765 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7766 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7767 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7773 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7774 is_integral_type(Matcher::vector_element_basic_type(n))); 7775 match(Set dst (VectorCastD2X src)); 7776 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7777 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7778 ins_encode %{ 7779 int vlen_enc = vector_length_encoding(this, $src); 7780 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7781 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7782 ExternalAddress(vector_float_signflip()); 7783 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7784 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vucast(vec dst, vec src) %{ 7790 match(Set dst (VectorUCastB2X src)); 7791 match(Set dst (VectorUCastS2X src)); 7792 match(Set dst (VectorUCastI2X src)); 7793 format %{ "vector_ucast $dst,$src\t!" %} 7794 ins_encode %{ 7795 assert(UseAVX > 0, "required"); 7796 7797 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7798 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7799 int vlen_enc = vector_length_encoding(this); 7800 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7806 predicate(!VM_Version::supports_avx512vl() && 7807 Matcher::vector_length_in_bytes(n) < 64 && 7808 Matcher::vector_element_basic_type(n) == T_INT); 7809 match(Set dst (RoundVF src)); 7810 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7811 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7812 ins_encode %{ 7813 int vlen_enc = vector_length_encoding(this); 7814 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7815 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7816 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7817 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7823 predicate((VM_Version::supports_avx512vl() || 7824 Matcher::vector_length_in_bytes(n) == 64) && 7825 Matcher::vector_element_basic_type(n) == T_INT); 7826 match(Set dst (RoundVF src)); 7827 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7828 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7829 ins_encode %{ 7830 int vlen_enc = vector_length_encoding(this); 7831 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7832 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7833 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7834 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7840 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7841 match(Set dst (RoundVD src)); 7842 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7843 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7844 ins_encode %{ 7845 int vlen_enc = vector_length_encoding(this); 7846 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7847 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7848 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7849 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7850 %} 7851 ins_pipe( pipe_slow ); 7852 %} 7853 7854 // --------------------------------- VectorMaskCmp -------------------------------------- 7855 7856 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7857 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7858 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7859 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7860 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7861 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7862 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7863 ins_encode %{ 7864 int vlen_enc = vector_length_encoding(this, $src1); 7865 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7866 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7867 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7868 } else { 7869 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7870 } 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7876 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7877 n->bottom_type()->isa_vectmask() == nullptr && 7878 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7879 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7880 effect(TEMP ktmp); 7881 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7882 ins_encode %{ 7883 int vlen_enc = Assembler::AVX_512bit; 7884 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7885 KRegister mask = k0; // The comparison itself is not being masked. 7886 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7887 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7888 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7889 } else { 7890 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7891 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7892 } 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7898 predicate(n->bottom_type()->isa_vectmask() && 7899 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7900 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7901 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7902 ins_encode %{ 7903 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7904 int vlen_enc = vector_length_encoding(this, $src1); 7905 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7906 KRegister mask = k0; // The comparison itself is not being masked. 7907 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7908 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7909 } else { 7910 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7911 } 7912 %} 7913 ins_pipe( pipe_slow ); 7914 %} 7915 7916 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7917 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7918 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7919 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7920 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7921 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7922 (n->in(2)->get_int() == BoolTest::eq || 7923 n->in(2)->get_int() == BoolTest::lt || 7924 n->in(2)->get_int() == BoolTest::gt)); // cond 7925 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7926 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7927 ins_encode %{ 7928 int vlen_enc = vector_length_encoding(this, $src1); 7929 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7930 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7931 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7937 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7938 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7939 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7940 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7941 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7942 (n->in(2)->get_int() == BoolTest::ne || 7943 n->in(2)->get_int() == BoolTest::le || 7944 n->in(2)->get_int() == BoolTest::ge)); // cond 7945 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7946 effect(TEMP dst, TEMP xtmp); 7947 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7948 ins_encode %{ 7949 int vlen_enc = vector_length_encoding(this, $src1); 7950 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7951 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7952 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7953 %} 7954 ins_pipe( pipe_slow ); 7955 %} 7956 7957 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7958 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7959 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7960 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7961 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7962 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7963 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7964 effect(TEMP dst, TEMP xtmp); 7965 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7966 ins_encode %{ 7967 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7968 int vlen_enc = vector_length_encoding(this, $src1); 7969 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7970 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7971 7972 if (vlen_enc == Assembler::AVX_128bit) { 7973 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7974 } else { 7975 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7976 } 7977 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7978 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7979 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7985 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7986 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7987 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7988 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7989 effect(TEMP ktmp); 7990 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7991 ins_encode %{ 7992 assert(UseAVX > 2, "required"); 7993 7994 int vlen_enc = vector_length_encoding(this, $src1); 7995 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7996 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7997 KRegister mask = k0; // The comparison itself is not being masked. 7998 bool merge = false; 7999 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8000 8001 switch (src1_elem_bt) { 8002 case T_INT: { 8003 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8004 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8005 break; 8006 } 8007 case T_LONG: { 8008 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8009 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8010 break; 8011 } 8012 default: assert(false, "%s", type2name(src1_elem_bt)); 8013 } 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 8019 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8020 predicate(n->bottom_type()->isa_vectmask() && 8021 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8022 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8023 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8024 ins_encode %{ 8025 assert(UseAVX > 2, "required"); 8026 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8027 8028 int vlen_enc = vector_length_encoding(this, $src1); 8029 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8030 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8031 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8032 8033 // Comparison i 8034 switch (src1_elem_bt) { 8035 case T_BYTE: { 8036 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8037 break; 8038 } 8039 case T_SHORT: { 8040 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8041 break; 8042 } 8043 case T_INT: { 8044 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8045 break; 8046 } 8047 case T_LONG: { 8048 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8049 break; 8050 } 8051 default: assert(false, "%s", type2name(src1_elem_bt)); 8052 } 8053 %} 8054 ins_pipe( pipe_slow ); 8055 %} 8056 8057 // Extract 8058 8059 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8060 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8061 match(Set dst (ExtractI src idx)); 8062 match(Set dst (ExtractS src idx)); 8063 match(Set dst (ExtractB src idx)); 8064 format %{ "extractI $dst,$src,$idx\t!" %} 8065 ins_encode %{ 8066 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8067 8068 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8069 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8070 %} 8071 ins_pipe( pipe_slow ); 8072 %} 8073 8074 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8075 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8076 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8077 match(Set dst (ExtractI src idx)); 8078 match(Set dst (ExtractS src idx)); 8079 match(Set dst (ExtractB src idx)); 8080 effect(TEMP vtmp); 8081 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8082 ins_encode %{ 8083 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8084 8085 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8086 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8087 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8093 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8094 match(Set dst (ExtractL src idx)); 8095 format %{ "extractL $dst,$src,$idx\t!" %} 8096 ins_encode %{ 8097 assert(UseSSE >= 4, "required"); 8098 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8099 8100 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8106 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8107 Matcher::vector_length(n->in(1)) == 8); // src 8108 match(Set dst (ExtractL src idx)); 8109 effect(TEMP vtmp); 8110 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8111 ins_encode %{ 8112 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8113 8114 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8115 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8121 predicate(Matcher::vector_length(n->in(1)) <= 4); 8122 match(Set dst (ExtractF src idx)); 8123 effect(TEMP dst, TEMP vtmp); 8124 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8125 ins_encode %{ 8126 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8127 8128 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8134 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8135 Matcher::vector_length(n->in(1)/*src*/) == 16); 8136 match(Set dst (ExtractF src idx)); 8137 effect(TEMP vtmp); 8138 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8139 ins_encode %{ 8140 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8141 8142 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8143 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8149 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8150 match(Set dst (ExtractD src idx)); 8151 format %{ "extractD $dst,$src,$idx\t!" %} 8152 ins_encode %{ 8153 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8154 8155 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8156 %} 8157 ins_pipe( pipe_slow ); 8158 %} 8159 8160 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8161 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8162 Matcher::vector_length(n->in(1)) == 8); // src 8163 match(Set dst (ExtractD src idx)); 8164 effect(TEMP vtmp); 8165 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8166 ins_encode %{ 8167 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8168 8169 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8170 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8171 %} 8172 ins_pipe( pipe_slow ); 8173 %} 8174 8175 // --------------------------------- Vector Blend -------------------------------------- 8176 8177 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8178 predicate(UseAVX == 0); 8179 match(Set dst (VectorBlend (Binary dst src) mask)); 8180 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8181 effect(TEMP tmp); 8182 ins_encode %{ 8183 assert(UseSSE >= 4, "required"); 8184 8185 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8186 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8187 } 8188 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8194 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8195 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8196 Matcher::vector_length_in_bytes(n) <= 32 && 8197 is_integral_type(Matcher::vector_element_basic_type(n))); 8198 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8199 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8200 ins_encode %{ 8201 int vlen_enc = vector_length_encoding(this); 8202 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8203 %} 8204 ins_pipe( pipe_slow ); 8205 %} 8206 8207 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8208 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8209 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8210 Matcher::vector_length_in_bytes(n) <= 32 && 8211 !is_integral_type(Matcher::vector_element_basic_type(n))); 8212 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8213 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8214 ins_encode %{ 8215 int vlen_enc = vector_length_encoding(this); 8216 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8222 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8223 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8224 Matcher::vector_length_in_bytes(n) <= 32); 8225 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8226 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8227 effect(TEMP vtmp, TEMP dst); 8228 ins_encode %{ 8229 int vlen_enc = vector_length_encoding(this); 8230 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8231 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8232 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8238 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8239 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8240 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8241 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8242 effect(TEMP ktmp); 8243 ins_encode %{ 8244 int vlen_enc = Assembler::AVX_512bit; 8245 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8246 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8247 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 8253 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8254 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8255 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8256 VM_Version::supports_avx512bw())); 8257 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8258 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8259 ins_encode %{ 8260 int vlen_enc = vector_length_encoding(this); 8261 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8262 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8263 %} 8264 ins_pipe( pipe_slow ); 8265 %} 8266 8267 // --------------------------------- ABS -------------------------------------- 8268 // a = |a| 8269 instruct vabsB_reg(vec dst, vec src) %{ 8270 match(Set dst (AbsVB src)); 8271 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8272 ins_encode %{ 8273 uint vlen = Matcher::vector_length(this); 8274 if (vlen <= 16) { 8275 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8276 } else { 8277 int vlen_enc = vector_length_encoding(this); 8278 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8279 } 8280 %} 8281 ins_pipe( pipe_slow ); 8282 %} 8283 8284 instruct vabsS_reg(vec dst, vec src) %{ 8285 match(Set dst (AbsVS src)); 8286 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8287 ins_encode %{ 8288 uint vlen = Matcher::vector_length(this); 8289 if (vlen <= 8) { 8290 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8291 } else { 8292 int vlen_enc = vector_length_encoding(this); 8293 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8294 } 8295 %} 8296 ins_pipe( pipe_slow ); 8297 %} 8298 8299 instruct vabsI_reg(vec dst, vec src) %{ 8300 match(Set dst (AbsVI src)); 8301 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8302 ins_encode %{ 8303 uint vlen = Matcher::vector_length(this); 8304 if (vlen <= 4) { 8305 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8306 } else { 8307 int vlen_enc = vector_length_encoding(this); 8308 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8309 } 8310 %} 8311 ins_pipe( pipe_slow ); 8312 %} 8313 8314 instruct vabsL_reg(vec dst, vec src) %{ 8315 match(Set dst (AbsVL src)); 8316 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8317 ins_encode %{ 8318 assert(UseAVX > 2, "required"); 8319 int vlen_enc = vector_length_encoding(this); 8320 if (!VM_Version::supports_avx512vl()) { 8321 vlen_enc = Assembler::AVX_512bit; 8322 } 8323 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8324 %} 8325 ins_pipe( pipe_slow ); 8326 %} 8327 8328 // --------------------------------- ABSNEG -------------------------------------- 8329 8330 instruct vabsnegF(vec dst, vec src) %{ 8331 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8332 match(Set dst (AbsVF src)); 8333 match(Set dst (NegVF src)); 8334 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8335 ins_cost(150); 8336 ins_encode %{ 8337 int opcode = this->ideal_Opcode(); 8338 int vlen = Matcher::vector_length(this); 8339 if (vlen == 2) { 8340 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8341 } else { 8342 assert(vlen == 8 || vlen == 16, "required"); 8343 int vlen_enc = vector_length_encoding(this); 8344 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8345 } 8346 %} 8347 ins_pipe( pipe_slow ); 8348 %} 8349 8350 instruct vabsneg4F(vec dst) %{ 8351 predicate(Matcher::vector_length(n) == 4); 8352 match(Set dst (AbsVF dst)); 8353 match(Set dst (NegVF dst)); 8354 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8355 ins_cost(150); 8356 ins_encode %{ 8357 int opcode = this->ideal_Opcode(); 8358 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8359 %} 8360 ins_pipe( pipe_slow ); 8361 %} 8362 8363 instruct vabsnegD(vec dst, vec src) %{ 8364 match(Set dst (AbsVD src)); 8365 match(Set dst (NegVD src)); 8366 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8367 ins_encode %{ 8368 int opcode = this->ideal_Opcode(); 8369 uint vlen = Matcher::vector_length(this); 8370 if (vlen == 2) { 8371 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8372 } else { 8373 int vlen_enc = vector_length_encoding(this); 8374 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8375 } 8376 %} 8377 ins_pipe( pipe_slow ); 8378 %} 8379 8380 //------------------------------------- VectorTest -------------------------------------------- 8381 8382 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8383 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8384 match(Set cr (VectorTest src1 src2)); 8385 effect(TEMP vtmp); 8386 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8387 ins_encode %{ 8388 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8389 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8390 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8391 %} 8392 ins_pipe( pipe_slow ); 8393 %} 8394 8395 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8396 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8397 match(Set cr (VectorTest src1 src2)); 8398 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8399 ins_encode %{ 8400 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8401 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8402 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8403 %} 8404 ins_pipe( pipe_slow ); 8405 %} 8406 8407 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8408 predicate((Matcher::vector_length(n->in(1)) < 8 || 8409 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8410 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8411 match(Set cr (VectorTest src1 src2)); 8412 effect(TEMP tmp); 8413 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8414 ins_encode %{ 8415 uint masklen = Matcher::vector_length(this, $src1); 8416 __ kmovwl($tmp$$Register, $src1$$KRegister); 8417 __ andl($tmp$$Register, (1 << masklen) - 1); 8418 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8424 predicate((Matcher::vector_length(n->in(1)) < 8 || 8425 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8426 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8427 match(Set cr (VectorTest src1 src2)); 8428 effect(TEMP tmp); 8429 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8430 ins_encode %{ 8431 uint masklen = Matcher::vector_length(this, $src1); 8432 __ kmovwl($tmp$$Register, $src1$$KRegister); 8433 __ andl($tmp$$Register, (1 << masklen) - 1); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8439 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8440 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8441 match(Set cr (VectorTest src1 src2)); 8442 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8443 ins_encode %{ 8444 uint masklen = Matcher::vector_length(this, $src1); 8445 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8446 %} 8447 ins_pipe( pipe_slow ); 8448 %} 8449 8450 //------------------------------------- LoadMask -------------------------------------------- 8451 8452 instruct loadMask(legVec dst, legVec src) %{ 8453 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8454 match(Set dst (VectorLoadMask src)); 8455 effect(TEMP dst); 8456 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8457 ins_encode %{ 8458 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8459 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8460 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8461 %} 8462 ins_pipe( pipe_slow ); 8463 %} 8464 8465 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8466 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8467 match(Set dst (VectorLoadMask src)); 8468 effect(TEMP xtmp); 8469 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8470 ins_encode %{ 8471 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8472 true, Assembler::AVX_512bit); 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8478 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8479 match(Set dst (VectorLoadMask src)); 8480 effect(TEMP xtmp); 8481 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8482 ins_encode %{ 8483 int vlen_enc = vector_length_encoding(in(1)); 8484 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8485 false, vlen_enc); 8486 %} 8487 ins_pipe( pipe_slow ); 8488 %} 8489 8490 //------------------------------------- StoreMask -------------------------------------------- 8491 8492 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8493 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8494 match(Set dst (VectorStoreMask src size)); 8495 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8496 ins_encode %{ 8497 int vlen = Matcher::vector_length(this); 8498 if (vlen <= 16 && UseAVX <= 2) { 8499 assert(UseSSE >= 3, "required"); 8500 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8501 } else { 8502 assert(UseAVX > 0, "required"); 8503 int src_vlen_enc = vector_length_encoding(this, $src); 8504 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8505 } 8506 %} 8507 ins_pipe( pipe_slow ); 8508 %} 8509 8510 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8511 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8512 match(Set dst (VectorStoreMask src size)); 8513 effect(TEMP_DEF dst, TEMP xtmp); 8514 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8515 ins_encode %{ 8516 int vlen_enc = Assembler::AVX_128bit; 8517 int vlen = Matcher::vector_length(this); 8518 if (vlen <= 8) { 8519 assert(UseSSE >= 3, "required"); 8520 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8521 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8522 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8523 } else { 8524 assert(UseAVX > 0, "required"); 8525 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8526 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8527 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8528 } 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8534 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8535 match(Set dst (VectorStoreMask src size)); 8536 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8537 effect(TEMP_DEF dst, TEMP xtmp); 8538 ins_encode %{ 8539 int vlen_enc = Assembler::AVX_128bit; 8540 int vlen = Matcher::vector_length(this); 8541 if (vlen <= 4) { 8542 assert(UseSSE >= 3, "required"); 8543 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8544 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8545 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8546 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8547 } else { 8548 assert(UseAVX > 0, "required"); 8549 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8550 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8551 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8552 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8553 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8554 } 8555 %} 8556 ins_pipe( pipe_slow ); 8557 %} 8558 8559 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8560 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8561 match(Set dst (VectorStoreMask src size)); 8562 effect(TEMP_DEF dst, TEMP xtmp); 8563 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8564 ins_encode %{ 8565 assert(UseSSE >= 3, "required"); 8566 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8567 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8568 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8569 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8570 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8571 %} 8572 ins_pipe( pipe_slow ); 8573 %} 8574 8575 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8576 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8577 match(Set dst (VectorStoreMask src size)); 8578 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8579 effect(TEMP_DEF dst, TEMP vtmp); 8580 ins_encode %{ 8581 int vlen_enc = Assembler::AVX_128bit; 8582 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8583 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8584 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8585 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8586 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8587 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8588 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8594 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8595 match(Set dst (VectorStoreMask src size)); 8596 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8597 ins_encode %{ 8598 int src_vlen_enc = vector_length_encoding(this, $src); 8599 int dst_vlen_enc = vector_length_encoding(this); 8600 if (!VM_Version::supports_avx512vl()) { 8601 src_vlen_enc = Assembler::AVX_512bit; 8602 } 8603 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8604 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8605 %} 8606 ins_pipe( pipe_slow ); 8607 %} 8608 8609 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8610 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8611 match(Set dst (VectorStoreMask src size)); 8612 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8613 ins_encode %{ 8614 int src_vlen_enc = vector_length_encoding(this, $src); 8615 int dst_vlen_enc = vector_length_encoding(this); 8616 if (!VM_Version::supports_avx512vl()) { 8617 src_vlen_enc = Assembler::AVX_512bit; 8618 } 8619 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8620 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8626 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8627 match(Set dst (VectorStoreMask mask size)); 8628 effect(TEMP_DEF dst); 8629 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8630 ins_encode %{ 8631 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8632 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8633 false, Assembler::AVX_512bit, noreg); 8634 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8640 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8641 match(Set dst (VectorStoreMask mask size)); 8642 effect(TEMP_DEF dst); 8643 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8644 ins_encode %{ 8645 int dst_vlen_enc = vector_length_encoding(this); 8646 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8647 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8648 %} 8649 ins_pipe( pipe_slow ); 8650 %} 8651 8652 instruct vmaskcast_evex(kReg dst) %{ 8653 match(Set dst (VectorMaskCast dst)); 8654 ins_cost(0); 8655 format %{ "vector_mask_cast $dst" %} 8656 ins_encode %{ 8657 // empty 8658 %} 8659 ins_pipe(empty); 8660 %} 8661 8662 instruct vmaskcast(vec dst) %{ 8663 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8664 match(Set dst (VectorMaskCast dst)); 8665 ins_cost(0); 8666 format %{ "vector_mask_cast $dst" %} 8667 ins_encode %{ 8668 // empty 8669 %} 8670 ins_pipe(empty); 8671 %} 8672 8673 instruct vmaskcast_avx(vec dst, vec src) %{ 8674 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8675 match(Set dst (VectorMaskCast src)); 8676 format %{ "vector_mask_cast $dst, $src" %} 8677 ins_encode %{ 8678 int vlen = Matcher::vector_length(this); 8679 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8680 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8681 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8682 %} 8683 ins_pipe(pipe_slow); 8684 %} 8685 8686 //-------------------------------- Load Iota Indices ---------------------------------- 8687 8688 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8689 match(Set dst (VectorLoadConst src)); 8690 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8691 ins_encode %{ 8692 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8693 BasicType bt = Matcher::vector_element_basic_type(this); 8694 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8695 %} 8696 ins_pipe( pipe_slow ); 8697 %} 8698 8699 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8700 match(Set dst (PopulateIndex src1 src2)); 8701 effect(TEMP dst, TEMP vtmp); 8702 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8703 ins_encode %{ 8704 assert($src2$$constant == 1, "required"); 8705 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8706 int vlen_enc = vector_length_encoding(this); 8707 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8708 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8709 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8710 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8711 %} 8712 ins_pipe( pipe_slow ); 8713 %} 8714 8715 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8716 match(Set dst (PopulateIndex src1 src2)); 8717 effect(TEMP dst, TEMP vtmp); 8718 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8719 ins_encode %{ 8720 assert($src2$$constant == 1, "required"); 8721 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8722 int vlen_enc = vector_length_encoding(this); 8723 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8724 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8725 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8726 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 //-------------------------------- Rearrange ---------------------------------- 8732 8733 // LoadShuffle/Rearrange for Byte 8734 instruct rearrangeB(vec dst, vec shuffle) %{ 8735 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8736 Matcher::vector_length(n) < 32); 8737 match(Set dst (VectorRearrange dst shuffle)); 8738 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8739 ins_encode %{ 8740 assert(UseSSE >= 4, "required"); 8741 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8747 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8748 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8749 match(Set dst (VectorRearrange src shuffle)); 8750 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8751 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8752 ins_encode %{ 8753 assert(UseAVX >= 2, "required"); 8754 // Swap src into vtmp1 8755 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8756 // Shuffle swapped src to get entries from other 128 bit lane 8757 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8758 // Shuffle original src to get entries from self 128 bit lane 8759 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8760 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8761 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8762 // Perform the blend 8763 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8764 %} 8765 ins_pipe( pipe_slow ); 8766 %} 8767 8768 8769 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8770 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8771 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8772 match(Set dst (VectorRearrange src shuffle)); 8773 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8774 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8775 ins_encode %{ 8776 int vlen_enc = vector_length_encoding(this); 8777 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8778 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8779 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8785 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8786 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8787 match(Set dst (VectorRearrange src shuffle)); 8788 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8789 ins_encode %{ 8790 int vlen_enc = vector_length_encoding(this); 8791 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 // LoadShuffle/Rearrange for Short 8797 8798 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8799 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8800 !VM_Version::supports_avx512bw()); 8801 match(Set dst (VectorLoadShuffle src)); 8802 effect(TEMP dst, TEMP vtmp); 8803 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8804 ins_encode %{ 8805 // Create a byte shuffle mask from short shuffle mask 8806 // only byte shuffle instruction available on these platforms 8807 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8808 if (UseAVX == 0) { 8809 assert(vlen_in_bytes <= 16, "required"); 8810 // Multiply each shuffle by two to get byte index 8811 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8812 __ psllw($vtmp$$XMMRegister, 1); 8813 8814 // Duplicate to create 2 copies of byte index 8815 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8816 __ psllw($dst$$XMMRegister, 8); 8817 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8818 8819 // Add one to get alternate byte index 8820 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8821 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8822 } else { 8823 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8824 int vlen_enc = vector_length_encoding(this); 8825 // Multiply each shuffle by two to get byte index 8826 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8827 8828 // Duplicate to create 2 copies of byte index 8829 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8830 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8831 8832 // Add one to get alternate byte index 8833 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8834 } 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct rearrangeS(vec dst, vec shuffle) %{ 8840 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8841 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8842 match(Set dst (VectorRearrange dst shuffle)); 8843 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8844 ins_encode %{ 8845 assert(UseSSE >= 4, "required"); 8846 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8847 %} 8848 ins_pipe( pipe_slow ); 8849 %} 8850 8851 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8852 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8853 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8854 match(Set dst (VectorRearrange src shuffle)); 8855 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8856 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8857 ins_encode %{ 8858 assert(UseAVX >= 2, "required"); 8859 // Swap src into vtmp1 8860 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8861 // Shuffle swapped src to get entries from other 128 bit lane 8862 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8863 // Shuffle original src to get entries from self 128 bit lane 8864 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8865 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8866 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8867 // Perform the blend 8868 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8874 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8875 VM_Version::supports_avx512bw()); 8876 match(Set dst (VectorRearrange src shuffle)); 8877 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8878 ins_encode %{ 8879 int vlen_enc = vector_length_encoding(this); 8880 if (!VM_Version::supports_avx512vl()) { 8881 vlen_enc = Assembler::AVX_512bit; 8882 } 8883 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 // LoadShuffle/Rearrange for Integer and Float 8889 8890 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8891 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8892 Matcher::vector_length(n) == 4 && UseAVX == 0); 8893 match(Set dst (VectorLoadShuffle src)); 8894 effect(TEMP dst, TEMP vtmp); 8895 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8896 ins_encode %{ 8897 assert(UseSSE >= 4, "required"); 8898 8899 // Create a byte shuffle mask from int shuffle mask 8900 // only byte shuffle instruction available on these platforms 8901 8902 // Duplicate and multiply each shuffle by 4 8903 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8904 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8905 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8906 __ psllw($vtmp$$XMMRegister, 2); 8907 8908 // Duplicate again to create 4 copies of byte index 8909 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8910 __ psllw($dst$$XMMRegister, 8); 8911 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8912 8913 // Add 3,2,1,0 to get alternate byte index 8914 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8915 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct rearrangeI(vec dst, vec shuffle) %{ 8921 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8922 UseAVX == 0); 8923 match(Set dst (VectorRearrange dst shuffle)); 8924 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8925 ins_encode %{ 8926 assert(UseSSE >= 4, "required"); 8927 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8928 %} 8929 ins_pipe( pipe_slow ); 8930 %} 8931 8932 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8933 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8934 UseAVX > 0); 8935 match(Set dst (VectorRearrange src shuffle)); 8936 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8937 ins_encode %{ 8938 int vlen_enc = vector_length_encoding(this); 8939 BasicType bt = Matcher::vector_element_basic_type(this); 8940 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8941 %} 8942 ins_pipe( pipe_slow ); 8943 %} 8944 8945 // LoadShuffle/Rearrange for Long and Double 8946 8947 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8948 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8949 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8950 match(Set dst (VectorLoadShuffle src)); 8951 effect(TEMP dst, TEMP vtmp); 8952 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8953 ins_encode %{ 8954 assert(UseAVX >= 2, "required"); 8955 8956 int vlen_enc = vector_length_encoding(this); 8957 // Create a double word shuffle mask from long shuffle mask 8958 // only double word shuffle instruction available on these platforms 8959 8960 // Multiply each shuffle by two to get double word index 8961 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8962 8963 // Duplicate each double word shuffle 8964 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8965 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8966 8967 // Add one to get alternate double word index 8968 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8969 %} 8970 ins_pipe( pipe_slow ); 8971 %} 8972 8973 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8974 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8975 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8976 match(Set dst (VectorRearrange src shuffle)); 8977 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8978 ins_encode %{ 8979 assert(UseAVX >= 2, "required"); 8980 8981 int vlen_enc = vector_length_encoding(this); 8982 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8988 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8989 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8990 match(Set dst (VectorRearrange src shuffle)); 8991 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8992 ins_encode %{ 8993 assert(UseAVX > 2, "required"); 8994 8995 int vlen_enc = vector_length_encoding(this); 8996 if (vlen_enc == Assembler::AVX_128bit) { 8997 vlen_enc = Assembler::AVX_256bit; 8998 } 8999 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9000 %} 9001 ins_pipe( pipe_slow ); 9002 %} 9003 9004 // --------------------------------- FMA -------------------------------------- 9005 // a * b + c 9006 9007 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9008 match(Set c (FmaVF c (Binary a b))); 9009 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9010 ins_cost(150); 9011 ins_encode %{ 9012 assert(UseFMA, "not enabled"); 9013 int vlen_enc = vector_length_encoding(this); 9014 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9015 %} 9016 ins_pipe( pipe_slow ); 9017 %} 9018 9019 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9020 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9021 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9022 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9023 ins_cost(150); 9024 ins_encode %{ 9025 assert(UseFMA, "not enabled"); 9026 int vlen_enc = vector_length_encoding(this); 9027 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9028 %} 9029 ins_pipe( pipe_slow ); 9030 %} 9031 9032 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9033 match(Set c (FmaVD c (Binary a b))); 9034 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9035 ins_cost(150); 9036 ins_encode %{ 9037 assert(UseFMA, "not enabled"); 9038 int vlen_enc = vector_length_encoding(this); 9039 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9045 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9046 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9047 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9048 ins_cost(150); 9049 ins_encode %{ 9050 assert(UseFMA, "not enabled"); 9051 int vlen_enc = vector_length_encoding(this); 9052 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 // --------------------------------- Vector Multiply Add -------------------------------------- 9058 9059 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9060 predicate(UseAVX == 0); 9061 match(Set dst (MulAddVS2VI dst src1)); 9062 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9063 ins_encode %{ 9064 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9070 predicate(UseAVX > 0); 9071 match(Set dst (MulAddVS2VI src1 src2)); 9072 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9073 ins_encode %{ 9074 int vlen_enc = vector_length_encoding(this); 9075 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9081 9082 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9083 predicate(VM_Version::supports_avx512_vnni()); 9084 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9085 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9086 ins_encode %{ 9087 assert(UseAVX > 2, "required"); 9088 int vlen_enc = vector_length_encoding(this); 9089 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9090 %} 9091 ins_pipe( pipe_slow ); 9092 ins_cost(10); 9093 %} 9094 9095 // --------------------------------- PopCount -------------------------------------- 9096 9097 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9098 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9099 match(Set dst (PopCountVI src)); 9100 match(Set dst (PopCountVL src)); 9101 format %{ "vector_popcount_integral $dst, $src" %} 9102 ins_encode %{ 9103 int opcode = this->ideal_Opcode(); 9104 int vlen_enc = vector_length_encoding(this, $src); 9105 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9106 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9107 %} 9108 ins_pipe( pipe_slow ); 9109 %} 9110 9111 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9112 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9113 match(Set dst (PopCountVI src mask)); 9114 match(Set dst (PopCountVL src mask)); 9115 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9116 ins_encode %{ 9117 int vlen_enc = vector_length_encoding(this, $src); 9118 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9119 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9120 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9126 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9127 match(Set dst (PopCountVI src)); 9128 match(Set dst (PopCountVL src)); 9129 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9130 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9131 ins_encode %{ 9132 int opcode = this->ideal_Opcode(); 9133 int vlen_enc = vector_length_encoding(this, $src); 9134 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9135 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9136 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9142 9143 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9144 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9145 Matcher::vector_length_in_bytes(n->in(1)))); 9146 match(Set dst (CountTrailingZerosV src)); 9147 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9148 ins_cost(400); 9149 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9150 ins_encode %{ 9151 int vlen_enc = vector_length_encoding(this, $src); 9152 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9153 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9154 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9155 %} 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9160 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9161 VM_Version::supports_avx512cd() && 9162 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9163 match(Set dst (CountTrailingZerosV src)); 9164 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9165 ins_cost(400); 9166 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9167 ins_encode %{ 9168 int vlen_enc = vector_length_encoding(this, $src); 9169 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9170 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9171 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9177 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9178 match(Set dst (CountTrailingZerosV src)); 9179 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9180 ins_cost(400); 9181 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9182 ins_encode %{ 9183 int vlen_enc = vector_length_encoding(this, $src); 9184 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9185 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9186 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9187 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9193 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9194 match(Set dst (CountTrailingZerosV src)); 9195 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9196 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9197 ins_encode %{ 9198 int vlen_enc = vector_length_encoding(this, $src); 9199 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9200 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9201 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 9207 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9208 9209 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9210 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9211 effect(TEMP dst); 9212 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9213 ins_encode %{ 9214 int vector_len = vector_length_encoding(this); 9215 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9216 %} 9217 ins_pipe( pipe_slow ); 9218 %} 9219 9220 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9221 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9222 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9223 effect(TEMP dst); 9224 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9225 ins_encode %{ 9226 int vector_len = vector_length_encoding(this); 9227 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9228 %} 9229 ins_pipe( pipe_slow ); 9230 %} 9231 9232 // --------------------------------- Rotation Operations ---------------------------------- 9233 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9234 match(Set dst (RotateLeftV src shift)); 9235 match(Set dst (RotateRightV src shift)); 9236 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9237 ins_encode %{ 9238 int opcode = this->ideal_Opcode(); 9239 int vector_len = vector_length_encoding(this); 9240 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9241 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9242 %} 9243 ins_pipe( pipe_slow ); 9244 %} 9245 9246 instruct vprorate(vec dst, vec src, vec shift) %{ 9247 match(Set dst (RotateLeftV src shift)); 9248 match(Set dst (RotateRightV src shift)); 9249 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9250 ins_encode %{ 9251 int opcode = this->ideal_Opcode(); 9252 int vector_len = vector_length_encoding(this); 9253 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9254 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9255 %} 9256 ins_pipe( pipe_slow ); 9257 %} 9258 9259 // ---------------------------------- Masked Operations ------------------------------------ 9260 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9261 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9262 match(Set dst (LoadVectorMasked mem mask)); 9263 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9264 ins_encode %{ 9265 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9266 int vlen_enc = vector_length_encoding(this); 9267 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 9273 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9274 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9275 match(Set dst (LoadVectorMasked mem mask)); 9276 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9277 ins_encode %{ 9278 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9279 int vector_len = vector_length_encoding(this); 9280 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9281 %} 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9286 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9287 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9288 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9289 ins_encode %{ 9290 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9291 int vlen_enc = vector_length_encoding(src_node); 9292 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9293 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9294 %} 9295 ins_pipe( pipe_slow ); 9296 %} 9297 9298 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9299 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9300 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9301 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9302 ins_encode %{ 9303 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9304 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9305 int vlen_enc = vector_length_encoding(src_node); 9306 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9312 match(Set addr (VerifyVectorAlignment addr mask)); 9313 effect(KILL cr); 9314 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9315 ins_encode %{ 9316 Label Lskip; 9317 // check if masked bits of addr are zero 9318 __ testq($addr$$Register, $mask$$constant); 9319 __ jccb(Assembler::equal, Lskip); 9320 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9321 __ bind(Lskip); 9322 %} 9323 ins_pipe(pipe_slow); 9324 %} 9325 9326 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9327 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9328 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9329 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9330 ins_encode %{ 9331 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9332 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9333 9334 Label DONE; 9335 int vlen_enc = vector_length_encoding(this, $src1); 9336 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9337 9338 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9339 __ mov64($dst$$Register, -1L); 9340 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9341 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9342 __ jccb(Assembler::carrySet, DONE); 9343 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9344 __ notq($dst$$Register); 9345 __ tzcntq($dst$$Register, $dst$$Register); 9346 __ bind(DONE); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 9352 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9353 match(Set dst (VectorMaskGen len)); 9354 effect(TEMP temp, KILL cr); 9355 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9356 ins_encode %{ 9357 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9358 %} 9359 ins_pipe( pipe_slow ); 9360 %} 9361 9362 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9363 match(Set dst (VectorMaskGen len)); 9364 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9365 effect(TEMP temp); 9366 ins_encode %{ 9367 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9368 __ kmovql($dst$$KRegister, $temp$$Register); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9374 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9375 match(Set dst (VectorMaskToLong mask)); 9376 effect(TEMP dst, KILL cr); 9377 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9378 ins_encode %{ 9379 int opcode = this->ideal_Opcode(); 9380 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9381 int mask_len = Matcher::vector_length(this, $mask); 9382 int mask_size = mask_len * type2aelembytes(mbt); 9383 int vlen_enc = vector_length_encoding(this, $mask); 9384 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9385 $dst$$Register, mask_len, mask_size, vlen_enc); 9386 %} 9387 ins_pipe( pipe_slow ); 9388 %} 9389 9390 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9391 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9392 match(Set dst (VectorMaskToLong mask)); 9393 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9394 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9395 ins_encode %{ 9396 int opcode = this->ideal_Opcode(); 9397 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9398 int mask_len = Matcher::vector_length(this, $mask); 9399 int vlen_enc = vector_length_encoding(this, $mask); 9400 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9401 $dst$$Register, mask_len, mbt, vlen_enc); 9402 %} 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9407 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9408 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9409 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9410 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9411 ins_encode %{ 9412 int opcode = this->ideal_Opcode(); 9413 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9414 int mask_len = Matcher::vector_length(this, $mask); 9415 int vlen_enc = vector_length_encoding(this, $mask); 9416 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9417 $dst$$Register, mask_len, mbt, vlen_enc); 9418 %} 9419 ins_pipe( pipe_slow ); 9420 %} 9421 9422 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9423 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9424 match(Set dst (VectorMaskTrueCount mask)); 9425 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9426 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9427 ins_encode %{ 9428 int opcode = this->ideal_Opcode(); 9429 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9430 int mask_len = Matcher::vector_length(this, $mask); 9431 int mask_size = mask_len * type2aelembytes(mbt); 9432 int vlen_enc = vector_length_encoding(this, $mask); 9433 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9434 $tmp$$Register, mask_len, mask_size, vlen_enc); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9440 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9441 match(Set dst (VectorMaskTrueCount mask)); 9442 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9443 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9444 ins_encode %{ 9445 int opcode = this->ideal_Opcode(); 9446 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9447 int mask_len = Matcher::vector_length(this, $mask); 9448 int vlen_enc = vector_length_encoding(this, $mask); 9449 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9450 $tmp$$Register, mask_len, mbt, vlen_enc); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9456 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9457 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9458 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9459 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9460 ins_encode %{ 9461 int opcode = this->ideal_Opcode(); 9462 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9463 int mask_len = Matcher::vector_length(this, $mask); 9464 int vlen_enc = vector_length_encoding(this, $mask); 9465 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9466 $tmp$$Register, mask_len, mbt, vlen_enc); 9467 %} 9468 ins_pipe( pipe_slow ); 9469 %} 9470 9471 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9472 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9473 match(Set dst (VectorMaskFirstTrue mask)); 9474 match(Set dst (VectorMaskLastTrue mask)); 9475 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9476 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9477 ins_encode %{ 9478 int opcode = this->ideal_Opcode(); 9479 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9480 int mask_len = Matcher::vector_length(this, $mask); 9481 int mask_size = mask_len * type2aelembytes(mbt); 9482 int vlen_enc = vector_length_encoding(this, $mask); 9483 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9484 $tmp$$Register, mask_len, mask_size, vlen_enc); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9490 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9491 match(Set dst (VectorMaskFirstTrue mask)); 9492 match(Set dst (VectorMaskLastTrue mask)); 9493 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9494 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9495 ins_encode %{ 9496 int opcode = this->ideal_Opcode(); 9497 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9498 int mask_len = Matcher::vector_length(this, $mask); 9499 int vlen_enc = vector_length_encoding(this, $mask); 9500 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9501 $tmp$$Register, mask_len, mbt, vlen_enc); 9502 %} 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9507 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9508 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9509 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9510 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9511 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9512 ins_encode %{ 9513 int opcode = this->ideal_Opcode(); 9514 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9515 int mask_len = Matcher::vector_length(this, $mask); 9516 int vlen_enc = vector_length_encoding(this, $mask); 9517 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9518 $tmp$$Register, mask_len, mbt, vlen_enc); 9519 %} 9520 ins_pipe( pipe_slow ); 9521 %} 9522 9523 // --------------------------------- Compress/Expand Operations --------------------------- 9524 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9525 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9526 match(Set dst (CompressV src mask)); 9527 match(Set dst (ExpandV src mask)); 9528 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9529 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9530 ins_encode %{ 9531 int opcode = this->ideal_Opcode(); 9532 int vlen_enc = vector_length_encoding(this); 9533 BasicType bt = Matcher::vector_element_basic_type(this); 9534 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9535 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9536 %} 9537 ins_pipe( pipe_slow ); 9538 %} 9539 9540 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9541 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9542 match(Set dst (CompressV src mask)); 9543 match(Set dst (ExpandV src mask)); 9544 format %{ "vector_compress_expand $dst, $src, $mask" %} 9545 ins_encode %{ 9546 int opcode = this->ideal_Opcode(); 9547 int vector_len = vector_length_encoding(this); 9548 BasicType bt = Matcher::vector_element_basic_type(this); 9549 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9550 %} 9551 ins_pipe( pipe_slow ); 9552 %} 9553 9554 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9555 match(Set dst (CompressM mask)); 9556 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9557 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9558 ins_encode %{ 9559 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9560 int mask_len = Matcher::vector_length(this); 9561 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9562 %} 9563 ins_pipe( pipe_slow ); 9564 %} 9565 9566 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9567 9568 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9569 predicate(!VM_Version::supports_gfni()); 9570 match(Set dst (ReverseV src)); 9571 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9572 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9573 ins_encode %{ 9574 int vec_enc = vector_length_encoding(this); 9575 BasicType bt = Matcher::vector_element_basic_type(this); 9576 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9577 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9578 %} 9579 ins_pipe( pipe_slow ); 9580 %} 9581 9582 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9583 predicate(VM_Version::supports_gfni()); 9584 match(Set dst (ReverseV src)); 9585 effect(TEMP dst, TEMP xtmp); 9586 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9587 ins_encode %{ 9588 int vec_enc = vector_length_encoding(this); 9589 BasicType bt = Matcher::vector_element_basic_type(this); 9590 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9591 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9592 $xtmp$$XMMRegister); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vreverse_byte_reg(vec dst, vec src) %{ 9598 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9599 match(Set dst (ReverseBytesV src)); 9600 effect(TEMP dst); 9601 format %{ "vector_reverse_byte $dst, $src" %} 9602 ins_encode %{ 9603 int vec_enc = vector_length_encoding(this); 9604 BasicType bt = Matcher::vector_element_basic_type(this); 9605 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9606 %} 9607 ins_pipe( pipe_slow ); 9608 %} 9609 9610 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9611 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9612 match(Set dst (ReverseBytesV src)); 9613 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9614 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9615 ins_encode %{ 9616 int vec_enc = vector_length_encoding(this); 9617 BasicType bt = Matcher::vector_element_basic_type(this); 9618 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9619 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9625 9626 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9627 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9628 Matcher::vector_length_in_bytes(n->in(1)))); 9629 match(Set dst (CountLeadingZerosV src)); 9630 format %{ "vector_count_leading_zeros $dst, $src" %} 9631 ins_encode %{ 9632 int vlen_enc = vector_length_encoding(this, $src); 9633 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9634 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9635 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9641 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9642 Matcher::vector_length_in_bytes(n->in(1)))); 9643 match(Set dst (CountLeadingZerosV src mask)); 9644 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9645 ins_encode %{ 9646 int vlen_enc = vector_length_encoding(this, $src); 9647 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9648 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9649 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9650 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9651 %} 9652 ins_pipe( pipe_slow ); 9653 %} 9654 9655 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9656 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9657 VM_Version::supports_avx512cd() && 9658 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9659 match(Set dst (CountLeadingZerosV src)); 9660 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9661 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9662 ins_encode %{ 9663 int vlen_enc = vector_length_encoding(this, $src); 9664 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9665 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9666 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9667 %} 9668 ins_pipe( pipe_slow ); 9669 %} 9670 9671 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9672 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9673 match(Set dst (CountLeadingZerosV src)); 9674 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9675 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9676 ins_encode %{ 9677 int vlen_enc = vector_length_encoding(this, $src); 9678 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9679 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9680 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9681 $rtmp$$Register, true, vlen_enc); 9682 %} 9683 ins_pipe( pipe_slow ); 9684 %} 9685 9686 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9687 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9688 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9689 match(Set dst (CountLeadingZerosV src)); 9690 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9691 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9692 ins_encode %{ 9693 int vlen_enc = vector_length_encoding(this, $src); 9694 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9695 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9696 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9697 %} 9698 ins_pipe( pipe_slow ); 9699 %} 9700 9701 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9702 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9703 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9704 match(Set dst (CountLeadingZerosV src)); 9705 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9706 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9707 ins_encode %{ 9708 int vlen_enc = vector_length_encoding(this, $src); 9709 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9710 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9711 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9712 %} 9713 ins_pipe( pipe_slow ); 9714 %} 9715 9716 // ---------------------------------- Vector Masked Operations ------------------------------------ 9717 9718 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9719 match(Set dst (AddVB (Binary dst src2) mask)); 9720 match(Set dst (AddVS (Binary dst src2) mask)); 9721 match(Set dst (AddVI (Binary dst src2) mask)); 9722 match(Set dst (AddVL (Binary dst src2) mask)); 9723 match(Set dst (AddVF (Binary dst src2) mask)); 9724 match(Set dst (AddVD (Binary dst src2) mask)); 9725 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9726 ins_encode %{ 9727 int vlen_enc = vector_length_encoding(this); 9728 BasicType bt = Matcher::vector_element_basic_type(this); 9729 int opc = this->ideal_Opcode(); 9730 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9731 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9737 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9738 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9739 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9740 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9741 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9742 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9743 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9744 ins_encode %{ 9745 int vlen_enc = vector_length_encoding(this); 9746 BasicType bt = Matcher::vector_element_basic_type(this); 9747 int opc = this->ideal_Opcode(); 9748 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9749 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9755 match(Set dst (XorV (Binary dst src2) mask)); 9756 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9757 ins_encode %{ 9758 int vlen_enc = vector_length_encoding(this); 9759 BasicType bt = Matcher::vector_element_basic_type(this); 9760 int opc = this->ideal_Opcode(); 9761 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9762 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9768 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9769 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9770 ins_encode %{ 9771 int vlen_enc = vector_length_encoding(this); 9772 BasicType bt = Matcher::vector_element_basic_type(this); 9773 int opc = this->ideal_Opcode(); 9774 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9775 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9776 %} 9777 ins_pipe( pipe_slow ); 9778 %} 9779 9780 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9781 match(Set dst (OrV (Binary dst src2) mask)); 9782 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9783 ins_encode %{ 9784 int vlen_enc = vector_length_encoding(this); 9785 BasicType bt = Matcher::vector_element_basic_type(this); 9786 int opc = this->ideal_Opcode(); 9787 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9788 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9789 %} 9790 ins_pipe( pipe_slow ); 9791 %} 9792 9793 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9794 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9795 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9796 ins_encode %{ 9797 int vlen_enc = vector_length_encoding(this); 9798 BasicType bt = Matcher::vector_element_basic_type(this); 9799 int opc = this->ideal_Opcode(); 9800 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9801 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9802 %} 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9807 match(Set dst (AndV (Binary dst src2) mask)); 9808 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9809 ins_encode %{ 9810 int vlen_enc = vector_length_encoding(this); 9811 BasicType bt = Matcher::vector_element_basic_type(this); 9812 int opc = this->ideal_Opcode(); 9813 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9814 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9815 %} 9816 ins_pipe( pipe_slow ); 9817 %} 9818 9819 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9820 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9821 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9822 ins_encode %{ 9823 int vlen_enc = vector_length_encoding(this); 9824 BasicType bt = Matcher::vector_element_basic_type(this); 9825 int opc = this->ideal_Opcode(); 9826 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9827 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9828 %} 9829 ins_pipe( pipe_slow ); 9830 %} 9831 9832 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9833 match(Set dst (SubVB (Binary dst src2) mask)); 9834 match(Set dst (SubVS (Binary dst src2) mask)); 9835 match(Set dst (SubVI (Binary dst src2) mask)); 9836 match(Set dst (SubVL (Binary dst src2) mask)); 9837 match(Set dst (SubVF (Binary dst src2) mask)); 9838 match(Set dst (SubVD (Binary dst src2) mask)); 9839 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9840 ins_encode %{ 9841 int vlen_enc = vector_length_encoding(this); 9842 BasicType bt = Matcher::vector_element_basic_type(this); 9843 int opc = this->ideal_Opcode(); 9844 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9845 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9846 %} 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9851 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9852 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9853 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9854 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9855 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9856 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9857 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9858 ins_encode %{ 9859 int vlen_enc = vector_length_encoding(this); 9860 BasicType bt = Matcher::vector_element_basic_type(this); 9861 int opc = this->ideal_Opcode(); 9862 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9863 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9869 match(Set dst (MulVS (Binary dst src2) mask)); 9870 match(Set dst (MulVI (Binary dst src2) mask)); 9871 match(Set dst (MulVL (Binary dst src2) mask)); 9872 match(Set dst (MulVF (Binary dst src2) mask)); 9873 match(Set dst (MulVD (Binary dst src2) mask)); 9874 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9875 ins_encode %{ 9876 int vlen_enc = vector_length_encoding(this); 9877 BasicType bt = Matcher::vector_element_basic_type(this); 9878 int opc = this->ideal_Opcode(); 9879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9880 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9886 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9887 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9888 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9889 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9890 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9891 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9892 ins_encode %{ 9893 int vlen_enc = vector_length_encoding(this); 9894 BasicType bt = Matcher::vector_element_basic_type(this); 9895 int opc = this->ideal_Opcode(); 9896 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9897 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9898 %} 9899 ins_pipe( pipe_slow ); 9900 %} 9901 9902 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9903 match(Set dst (SqrtVF dst mask)); 9904 match(Set dst (SqrtVD dst mask)); 9905 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9906 ins_encode %{ 9907 int vlen_enc = vector_length_encoding(this); 9908 BasicType bt = Matcher::vector_element_basic_type(this); 9909 int opc = this->ideal_Opcode(); 9910 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9911 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9912 %} 9913 ins_pipe( pipe_slow ); 9914 %} 9915 9916 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9917 match(Set dst (DivVF (Binary dst src2) mask)); 9918 match(Set dst (DivVD (Binary dst src2) mask)); 9919 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9920 ins_encode %{ 9921 int vlen_enc = vector_length_encoding(this); 9922 BasicType bt = Matcher::vector_element_basic_type(this); 9923 int opc = this->ideal_Opcode(); 9924 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9925 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9926 %} 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9931 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9932 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9933 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9934 ins_encode %{ 9935 int vlen_enc = vector_length_encoding(this); 9936 BasicType bt = Matcher::vector_element_basic_type(this); 9937 int opc = this->ideal_Opcode(); 9938 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9939 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9940 %} 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 9945 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9946 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9947 match(Set dst (RotateRightV (Binary dst shift) mask)); 9948 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9949 ins_encode %{ 9950 int vlen_enc = vector_length_encoding(this); 9951 BasicType bt = Matcher::vector_element_basic_type(this); 9952 int opc = this->ideal_Opcode(); 9953 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9954 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9955 %} 9956 ins_pipe( pipe_slow ); 9957 %} 9958 9959 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9960 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9961 match(Set dst (RotateRightV (Binary dst src2) mask)); 9962 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9963 ins_encode %{ 9964 int vlen_enc = vector_length_encoding(this); 9965 BasicType bt = Matcher::vector_element_basic_type(this); 9966 int opc = this->ideal_Opcode(); 9967 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9968 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9974 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9975 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9976 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9977 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9978 ins_encode %{ 9979 int vlen_enc = vector_length_encoding(this); 9980 BasicType bt = Matcher::vector_element_basic_type(this); 9981 int opc = this->ideal_Opcode(); 9982 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9983 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9984 %} 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9989 predicate(!n->as_ShiftV()->is_var_shift()); 9990 match(Set dst (LShiftVS (Binary dst src2) mask)); 9991 match(Set dst (LShiftVI (Binary dst src2) mask)); 9992 match(Set dst (LShiftVL (Binary dst src2) mask)); 9993 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9994 ins_encode %{ 9995 int vlen_enc = vector_length_encoding(this); 9996 BasicType bt = Matcher::vector_element_basic_type(this); 9997 int opc = this->ideal_Opcode(); 9998 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9999 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10000 %} 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10005 predicate(n->as_ShiftV()->is_var_shift()); 10006 match(Set dst (LShiftVS (Binary dst src2) mask)); 10007 match(Set dst (LShiftVI (Binary dst src2) mask)); 10008 match(Set dst (LShiftVL (Binary dst src2) mask)); 10009 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10010 ins_encode %{ 10011 int vlen_enc = vector_length_encoding(this); 10012 BasicType bt = Matcher::vector_element_basic_type(this); 10013 int opc = this->ideal_Opcode(); 10014 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10015 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10016 %} 10017 ins_pipe( pipe_slow ); 10018 %} 10019 10020 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10021 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10022 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10023 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10024 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10025 ins_encode %{ 10026 int vlen_enc = vector_length_encoding(this); 10027 BasicType bt = Matcher::vector_element_basic_type(this); 10028 int opc = this->ideal_Opcode(); 10029 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10030 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10031 %} 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10036 predicate(!n->as_ShiftV()->is_var_shift()); 10037 match(Set dst (RShiftVS (Binary dst src2) mask)); 10038 match(Set dst (RShiftVI (Binary dst src2) mask)); 10039 match(Set dst (RShiftVL (Binary dst src2) mask)); 10040 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10041 ins_encode %{ 10042 int vlen_enc = vector_length_encoding(this); 10043 BasicType bt = Matcher::vector_element_basic_type(this); 10044 int opc = this->ideal_Opcode(); 10045 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10046 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10047 %} 10048 ins_pipe( pipe_slow ); 10049 %} 10050 10051 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10052 predicate(n->as_ShiftV()->is_var_shift()); 10053 match(Set dst (RShiftVS (Binary dst src2) mask)); 10054 match(Set dst (RShiftVI (Binary dst src2) mask)); 10055 match(Set dst (RShiftVL (Binary dst src2) mask)); 10056 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10057 ins_encode %{ 10058 int vlen_enc = vector_length_encoding(this); 10059 BasicType bt = Matcher::vector_element_basic_type(this); 10060 int opc = this->ideal_Opcode(); 10061 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10062 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 10067 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10068 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10069 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10070 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10071 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10072 ins_encode %{ 10073 int vlen_enc = vector_length_encoding(this); 10074 BasicType bt = Matcher::vector_element_basic_type(this); 10075 int opc = this->ideal_Opcode(); 10076 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10077 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10078 %} 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10083 predicate(!n->as_ShiftV()->is_var_shift()); 10084 match(Set dst (URShiftVS (Binary dst src2) mask)); 10085 match(Set dst (URShiftVI (Binary dst src2) mask)); 10086 match(Set dst (URShiftVL (Binary dst src2) mask)); 10087 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10088 ins_encode %{ 10089 int vlen_enc = vector_length_encoding(this); 10090 BasicType bt = Matcher::vector_element_basic_type(this); 10091 int opc = this->ideal_Opcode(); 10092 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10093 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10094 %} 10095 ins_pipe( pipe_slow ); 10096 %} 10097 10098 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10099 predicate(n->as_ShiftV()->is_var_shift()); 10100 match(Set dst (URShiftVS (Binary dst src2) mask)); 10101 match(Set dst (URShiftVI (Binary dst src2) mask)); 10102 match(Set dst (URShiftVL (Binary dst src2) mask)); 10103 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10104 ins_encode %{ 10105 int vlen_enc = vector_length_encoding(this); 10106 BasicType bt = Matcher::vector_element_basic_type(this); 10107 int opc = this->ideal_Opcode(); 10108 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10109 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10110 %} 10111 ins_pipe( pipe_slow ); 10112 %} 10113 10114 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10115 match(Set dst (MaxV (Binary dst src2) mask)); 10116 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10117 ins_encode %{ 10118 int vlen_enc = vector_length_encoding(this); 10119 BasicType bt = Matcher::vector_element_basic_type(this); 10120 int opc = this->ideal_Opcode(); 10121 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10122 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10123 %} 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10128 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10129 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10130 ins_encode %{ 10131 int vlen_enc = vector_length_encoding(this); 10132 BasicType bt = Matcher::vector_element_basic_type(this); 10133 int opc = this->ideal_Opcode(); 10134 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10135 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10136 %} 10137 ins_pipe( pipe_slow ); 10138 %} 10139 10140 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10141 match(Set dst (MinV (Binary dst src2) mask)); 10142 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10143 ins_encode %{ 10144 int vlen_enc = vector_length_encoding(this); 10145 BasicType bt = Matcher::vector_element_basic_type(this); 10146 int opc = this->ideal_Opcode(); 10147 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10148 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10154 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10155 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10156 ins_encode %{ 10157 int vlen_enc = vector_length_encoding(this); 10158 BasicType bt = Matcher::vector_element_basic_type(this); 10159 int opc = this->ideal_Opcode(); 10160 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10161 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10167 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10168 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10169 ins_encode %{ 10170 int vlen_enc = vector_length_encoding(this); 10171 BasicType bt = Matcher::vector_element_basic_type(this); 10172 int opc = this->ideal_Opcode(); 10173 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10174 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10175 %} 10176 ins_pipe( pipe_slow ); 10177 %} 10178 10179 instruct vabs_masked(vec dst, kReg mask) %{ 10180 match(Set dst (AbsVB dst mask)); 10181 match(Set dst (AbsVS dst mask)); 10182 match(Set dst (AbsVI dst mask)); 10183 match(Set dst (AbsVL dst mask)); 10184 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10185 ins_encode %{ 10186 int vlen_enc = vector_length_encoding(this); 10187 BasicType bt = Matcher::vector_element_basic_type(this); 10188 int opc = this->ideal_Opcode(); 10189 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10190 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10196 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10197 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10198 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10199 ins_encode %{ 10200 assert(UseFMA, "Needs FMA instructions support."); 10201 int vlen_enc = vector_length_encoding(this); 10202 BasicType bt = Matcher::vector_element_basic_type(this); 10203 int opc = this->ideal_Opcode(); 10204 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10205 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10206 %} 10207 ins_pipe( pipe_slow ); 10208 %} 10209 10210 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10211 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10212 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10213 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10214 ins_encode %{ 10215 assert(UseFMA, "Needs FMA instructions support."); 10216 int vlen_enc = vector_length_encoding(this); 10217 BasicType bt = Matcher::vector_element_basic_type(this); 10218 int opc = this->ideal_Opcode(); 10219 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10220 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10221 %} 10222 ins_pipe( pipe_slow ); 10223 %} 10224 10225 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10226 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10227 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10228 ins_encode %{ 10229 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10230 int vlen_enc = vector_length_encoding(this, $src1); 10231 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10232 10233 // Comparison i 10234 switch (src1_elem_bt) { 10235 case T_BYTE: { 10236 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10237 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10238 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10239 break; 10240 } 10241 case T_SHORT: { 10242 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10243 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10244 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10245 break; 10246 } 10247 case T_INT: { 10248 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10249 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10250 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10251 break; 10252 } 10253 case T_LONG: { 10254 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10255 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10256 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10257 break; 10258 } 10259 case T_FLOAT: { 10260 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10261 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10262 break; 10263 } 10264 case T_DOUBLE: { 10265 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10266 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10267 break; 10268 } 10269 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10270 } 10271 %} 10272 ins_pipe( pipe_slow ); 10273 %} 10274 10275 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10276 predicate(Matcher::vector_length(n) <= 32); 10277 match(Set dst (MaskAll src)); 10278 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10279 ins_encode %{ 10280 int mask_len = Matcher::vector_length(this); 10281 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10282 %} 10283 ins_pipe( pipe_slow ); 10284 %} 10285 10286 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10287 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10288 match(Set dst (XorVMask src (MaskAll cnt))); 10289 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10290 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10291 ins_encode %{ 10292 uint masklen = Matcher::vector_length(this); 10293 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10294 %} 10295 ins_pipe( pipe_slow ); 10296 %} 10297 10298 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10299 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10300 (Matcher::vector_length(n) == 16) || 10301 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10302 match(Set dst (XorVMask src (MaskAll cnt))); 10303 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10304 ins_encode %{ 10305 uint masklen = Matcher::vector_length(this); 10306 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10307 %} 10308 ins_pipe( pipe_slow ); 10309 %} 10310 10311 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10312 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10313 match(Set dst (VectorLongToMask src)); 10314 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10315 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10316 ins_encode %{ 10317 int mask_len = Matcher::vector_length(this); 10318 int vec_enc = vector_length_encoding(mask_len); 10319 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10320 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10321 %} 10322 ins_pipe( pipe_slow ); 10323 %} 10324 10325 10326 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10327 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10328 match(Set dst (VectorLongToMask src)); 10329 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10330 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10331 ins_encode %{ 10332 int mask_len = Matcher::vector_length(this); 10333 assert(mask_len <= 32, "invalid mask length"); 10334 int vec_enc = vector_length_encoding(mask_len); 10335 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10336 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10337 %} 10338 ins_pipe( pipe_slow ); 10339 %} 10340 10341 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10342 predicate(n->bottom_type()->isa_vectmask()); 10343 match(Set dst (VectorLongToMask src)); 10344 format %{ "long_to_mask_evex $dst, $src\t!" %} 10345 ins_encode %{ 10346 __ kmov($dst$$KRegister, $src$$Register); 10347 %} 10348 ins_pipe( pipe_slow ); 10349 %} 10350 10351 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10352 match(Set dst (AndVMask src1 src2)); 10353 match(Set dst (OrVMask src1 src2)); 10354 match(Set dst (XorVMask src1 src2)); 10355 effect(TEMP kscratch); 10356 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10357 ins_encode %{ 10358 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10359 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10360 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10361 uint masklen = Matcher::vector_length(this); 10362 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10363 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10364 %} 10365 ins_pipe( pipe_slow ); 10366 %} 10367 10368 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10369 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10370 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10371 ins_encode %{ 10372 int vlen_enc = vector_length_encoding(this); 10373 BasicType bt = Matcher::vector_element_basic_type(this); 10374 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10375 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10376 %} 10377 ins_pipe( pipe_slow ); 10378 %} 10379 10380 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10381 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10382 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10383 ins_encode %{ 10384 int vlen_enc = vector_length_encoding(this); 10385 BasicType bt = Matcher::vector_element_basic_type(this); 10386 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10387 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10388 %} 10389 ins_pipe( pipe_slow ); 10390 %} 10391 10392 instruct castMM(kReg dst) 10393 %{ 10394 match(Set dst (CastVV dst)); 10395 10396 size(0); 10397 format %{ "# castVV of $dst" %} 10398 ins_encode(/* empty encoding */); 10399 ins_cost(0); 10400 ins_pipe(empty); 10401 %} 10402 10403 instruct castVV(vec dst) 10404 %{ 10405 match(Set dst (CastVV dst)); 10406 10407 size(0); 10408 format %{ "# castVV of $dst" %} 10409 ins_encode(/* empty encoding */); 10410 ins_cost(0); 10411 ins_pipe(empty); 10412 %} 10413 10414 instruct castVVLeg(legVec dst) 10415 %{ 10416 match(Set dst (CastVV dst)); 10417 10418 size(0); 10419 format %{ "# castVV of $dst" %} 10420 ins_encode(/* empty encoding */); 10421 ins_cost(0); 10422 ins_pipe(empty); 10423 %} 10424 10425 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10426 %{ 10427 match(Set dst (IsInfiniteF src)); 10428 effect(TEMP ktmp, KILL cr); 10429 format %{ "float_class_check $dst, $src" %} 10430 ins_encode %{ 10431 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10432 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10433 %} 10434 ins_pipe(pipe_slow); 10435 %} 10436 10437 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10438 %{ 10439 match(Set dst (IsInfiniteD src)); 10440 effect(TEMP ktmp, KILL cr); 10441 format %{ "double_class_check $dst, $src" %} 10442 ins_encode %{ 10443 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10444 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10445 %} 10446 ins_pipe(pipe_slow); 10447 %} 10448 10449 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10450 %{ 10451 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10452 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10453 match(Set dst (SaturatingAddV src1 src2)); 10454 match(Set dst (SaturatingSubV src1 src2)); 10455 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10456 ins_encode %{ 10457 int vlen_enc = vector_length_encoding(this); 10458 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10459 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10460 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10461 %} 10462 ins_pipe(pipe_slow); 10463 %} 10464 10465 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10466 %{ 10467 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10468 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10469 match(Set dst (SaturatingAddV src1 src2)); 10470 match(Set dst (SaturatingSubV src1 src2)); 10471 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10472 ins_encode %{ 10473 int vlen_enc = vector_length_encoding(this); 10474 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10475 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10476 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10477 %} 10478 ins_pipe(pipe_slow); 10479 %} 10480 10481 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10482 %{ 10483 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10484 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10485 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10486 match(Set dst (SaturatingAddV src1 src2)); 10487 match(Set dst (SaturatingSubV src1 src2)); 10488 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10489 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10490 ins_encode %{ 10491 int vlen_enc = vector_length_encoding(this); 10492 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10493 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10494 $src1$$XMMRegister, $src2$$XMMRegister, 10495 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10496 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10497 %} 10498 ins_pipe(pipe_slow); 10499 %} 10500 10501 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10502 %{ 10503 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10504 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10505 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10506 match(Set dst (SaturatingAddV src1 src2)); 10507 match(Set dst (SaturatingSubV src1 src2)); 10508 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10509 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10510 ins_encode %{ 10511 int vlen_enc = vector_length_encoding(this); 10512 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10513 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10514 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10515 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10516 %} 10517 ins_pipe(pipe_slow); 10518 %} 10519 10520 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10521 %{ 10522 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10523 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10524 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10525 match(Set dst (SaturatingAddV src1 src2)); 10526 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10527 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10528 ins_encode %{ 10529 int vlen_enc = vector_length_encoding(this); 10530 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10531 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10532 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10533 %} 10534 ins_pipe(pipe_slow); 10535 %} 10536 10537 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10538 %{ 10539 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10540 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10541 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10542 match(Set dst (SaturatingAddV src1 src2)); 10543 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10544 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10545 ins_encode %{ 10546 int vlen_enc = vector_length_encoding(this); 10547 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10548 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10549 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10550 %} 10551 ins_pipe(pipe_slow); 10552 %} 10553 10554 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10555 %{ 10556 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10557 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10558 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10559 match(Set dst (SaturatingSubV src1 src2)); 10560 effect(TEMP ktmp); 10561 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10562 ins_encode %{ 10563 int vlen_enc = vector_length_encoding(this); 10564 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10565 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10566 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10567 %} 10568 ins_pipe(pipe_slow); 10569 %} 10570 10571 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10572 %{ 10573 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10574 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10575 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10576 match(Set dst (SaturatingSubV src1 src2)); 10577 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10578 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10579 ins_encode %{ 10580 int vlen_enc = vector_length_encoding(this); 10581 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10582 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10583 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10584 %} 10585 ins_pipe(pipe_slow); 10586 %} 10587 10588 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10589 %{ 10590 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10591 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10592 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10593 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10594 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10595 ins_encode %{ 10596 int vlen_enc = vector_length_encoding(this); 10597 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10598 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10599 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10600 %} 10601 ins_pipe(pipe_slow); 10602 %} 10603 10604 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10605 %{ 10606 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10607 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10608 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10609 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10610 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10611 ins_encode %{ 10612 int vlen_enc = vector_length_encoding(this); 10613 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10614 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10615 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10616 %} 10617 ins_pipe(pipe_slow); 10618 %} 10619 10620 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10621 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10622 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10623 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10624 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10625 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10626 ins_encode %{ 10627 int vlen_enc = vector_length_encoding(this); 10628 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10629 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10630 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10631 %} 10632 ins_pipe( pipe_slow ); 10633 %} 10634 10635 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10636 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10637 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10638 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10639 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10640 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10641 ins_encode %{ 10642 int vlen_enc = vector_length_encoding(this); 10643 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10644 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10645 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10646 %} 10647 ins_pipe( pipe_slow ); 10648 %} 10649 10650 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10651 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10652 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10653 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10654 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10655 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10656 ins_encode %{ 10657 int vlen_enc = vector_length_encoding(this); 10658 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10659 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10660 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10661 %} 10662 ins_pipe( pipe_slow ); 10663 %} 10664 10665 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10666 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10667 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10668 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10669 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10670 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10671 ins_encode %{ 10672 int vlen_enc = vector_length_encoding(this); 10673 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10674 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10675 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10676 %} 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10681 %{ 10682 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10683 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10684 ins_encode %{ 10685 int vlen_enc = vector_length_encoding(this); 10686 BasicType bt = Matcher::vector_element_basic_type(this); 10687 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10688 %} 10689 ins_pipe(pipe_slow); 10690 %} 10691 10692 instruct reinterpretS2HF(regF dst, rRegI src) 10693 %{ 10694 match(Set dst (ReinterpretS2HF src)); 10695 format %{ "vmovw $dst, $src" %} 10696 ins_encode %{ 10697 __ vmovw($dst$$XMMRegister, $src$$Register); 10698 %} 10699 ins_pipe(pipe_slow); 10700 %} 10701 10702 instruct reinterpretHF2S(rRegI dst, regF src) 10703 %{ 10704 match(Set dst (ReinterpretHF2S src)); 10705 format %{ "vmovw $dst, $src" %} 10706 ins_encode %{ 10707 __ vmovw($dst$$Register, $src$$XMMRegister); 10708 %} 10709 ins_pipe(pipe_slow); 10710 %} 10711 10712 instruct convF2HFAndS2HF(regF dst, regF src) 10713 %{ 10714 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10715 format %{ "convF2HFAndS2HF $dst, $src" %} 10716 ins_encode %{ 10717 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10718 %} 10719 ins_pipe(pipe_slow); 10720 %} 10721 10722 instruct convHF2SAndHF2F(regF dst, regF src) 10723 %{ 10724 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10725 format %{ "convHF2SAndHF2F $dst, $src" %} 10726 ins_encode %{ 10727 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10728 %} 10729 ins_pipe(pipe_slow); 10730 %} 10731 10732 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10733 %{ 10734 match(Set dst (SqrtHF src)); 10735 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10736 ins_encode %{ 10737 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10738 %} 10739 ins_pipe(pipe_slow); 10740 %} 10741 10742 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10743 %{ 10744 match(Set dst (AddHF src1 src2)); 10745 match(Set dst (DivHF src1 src2)); 10746 match(Set dst (MulHF src1 src2)); 10747 match(Set dst (SubHF src1 src2)); 10748 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10749 ins_encode %{ 10750 int opcode = this->ideal_Opcode(); 10751 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10752 %} 10753 ins_pipe(pipe_slow); 10754 %} 10755 10756 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) 10757 %{ 10758 predicate(VM_Version::supports_avx10_2()); 10759 match(Set dst (MaxHF src1 src2)); 10760 match(Set dst (MinHF src1 src2)); 10761 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} 10762 ins_encode %{ 10763 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10764 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); 10765 %} 10766 ins_pipe( pipe_slow ); 10767 %} 10768 10769 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10770 %{ 10771 predicate(!VM_Version::supports_avx10_2()); 10772 match(Set dst (MaxHF src1 src2)); 10773 match(Set dst (MinHF src1 src2)); 10774 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10775 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10776 ins_encode %{ 10777 int opcode = this->ideal_Opcode(); 10778 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10779 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10780 %} 10781 ins_pipe( pipe_slow ); 10782 %} 10783 10784 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10785 %{ 10786 match(Set dst (FmaHF src2 (Binary dst src1))); 10787 effect(DEF dst); 10788 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10789 ins_encode %{ 10790 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10791 %} 10792 ins_pipe( pipe_slow ); 10793 %} 10794 10795 10796 instruct vector_sqrt_HF_reg(vec dst, vec src) 10797 %{ 10798 match(Set dst (SqrtVHF src)); 10799 format %{ "vector_sqrt_fp16 $dst, $src" %} 10800 ins_encode %{ 10801 int vlen_enc = vector_length_encoding(this); 10802 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10803 %} 10804 ins_pipe(pipe_slow); 10805 %} 10806 10807 instruct vector_sqrt_HF_mem(vec dst, memory src) 10808 %{ 10809 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10810 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10811 ins_encode %{ 10812 int vlen_enc = vector_length_encoding(this); 10813 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10814 %} 10815 ins_pipe(pipe_slow); 10816 %} 10817 10818 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10819 %{ 10820 match(Set dst (AddVHF src1 src2)); 10821 match(Set dst (DivVHF src1 src2)); 10822 match(Set dst (MulVHF src1 src2)); 10823 match(Set dst (SubVHF src1 src2)); 10824 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10825 ins_encode %{ 10826 int vlen_enc = vector_length_encoding(this); 10827 int opcode = this->ideal_Opcode(); 10828 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10829 %} 10830 ins_pipe(pipe_slow); 10831 %} 10832 10833 10834 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10835 %{ 10836 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10837 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10838 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10839 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10840 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10841 ins_encode %{ 10842 int vlen_enc = vector_length_encoding(this); 10843 int opcode = this->ideal_Opcode(); 10844 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10845 %} 10846 ins_pipe(pipe_slow); 10847 %} 10848 10849 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10850 %{ 10851 match(Set dst (FmaVHF src2 (Binary dst src1))); 10852 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10853 ins_encode %{ 10854 int vlen_enc = vector_length_encoding(this); 10855 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10856 %} 10857 ins_pipe( pipe_slow ); 10858 %} 10859 10860 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10861 %{ 10862 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10863 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10864 ins_encode %{ 10865 int vlen_enc = vector_length_encoding(this); 10866 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10867 %} 10868 ins_pipe( pipe_slow ); 10869 %} 10870 10871 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) 10872 %{ 10873 predicate(VM_Version::supports_avx10_2()); 10874 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); 10875 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); 10876 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} 10877 ins_encode %{ 10878 int vlen_enc = vector_length_encoding(this); 10879 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10880 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); 10881 %} 10882 ins_pipe( pipe_slow ); 10883 %} 10884 10885 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) 10886 %{ 10887 predicate(VM_Version::supports_avx10_2()); 10888 match(Set dst (MinVHF src1 src2)); 10889 match(Set dst (MaxVHF src1 src2)); 10890 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} 10891 ins_encode %{ 10892 int vlen_enc = vector_length_encoding(this); 10893 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10894 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); 10895 %} 10896 ins_pipe( pipe_slow ); 10897 %} 10898 10899 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 10900 %{ 10901 predicate(!VM_Version::supports_avx10_2()); 10902 match(Set dst (MinVHF src1 src2)); 10903 match(Set dst (MaxVHF src1 src2)); 10904 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10905 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10906 ins_encode %{ 10907 int vlen_enc = vector_length_encoding(this); 10908 int opcode = this->ideal_Opcode(); 10909 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 10910 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10911 %} 10912 ins_pipe( pipe_slow ); 10913 %}