1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1835 return false; 1836 } 1837 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1838 return false; 1839 } 1840 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1841 return false; 1842 } 1843 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1844 return false; 1845 } 1846 break; 1847 case Op_MaskAll: 1848 if (!VM_Version::supports_evex()) { 1849 return false; 1850 } 1851 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1852 return false; 1853 } 1854 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1855 return false; 1856 } 1857 break; 1858 case Op_VectorMaskCmp: 1859 if (vlen < 2 || size_in_bits < 32) { 1860 return false; 1861 } 1862 break; 1863 case Op_CompressM: 1864 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1865 return false; 1866 } 1867 break; 1868 case Op_CompressV: 1869 case Op_ExpandV: 1870 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1871 return false; 1872 } 1873 if (size_in_bits < 128 ) { 1874 return false; 1875 } 1876 case Op_VectorLongToMask: 1877 if (UseAVX < 1) { 1878 return false; 1879 } 1880 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1881 return false; 1882 } 1883 break; 1884 case Op_SignumVD: 1885 case Op_SignumVF: 1886 if (UseAVX < 1) { 1887 return false; 1888 } 1889 break; 1890 case Op_PopCountVI: 1891 case Op_PopCountVL: { 1892 if (!is_pop_count_instr_target(bt) && 1893 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1894 return false; 1895 } 1896 } 1897 break; 1898 case Op_ReverseV: 1899 case Op_ReverseBytesV: 1900 if (UseAVX < 2) { 1901 return false; 1902 } 1903 break; 1904 case Op_CountTrailingZerosV: 1905 case Op_CountLeadingZerosV: 1906 if (UseAVX < 2) { 1907 return false; 1908 } 1909 break; 1910 } 1911 return true; // Per default match rules are supported. 1912 } 1913 1914 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1915 // ADLC based match_rule_supported routine checks for the existence of pattern based 1916 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1917 // of their non-masked counterpart with mask edge being the differentiator. 1918 // This routine does a strict check on the existence of masked operation patterns 1919 // by returning a default false value for all the other opcodes apart from the 1920 // ones whose masked instruction patterns are defined in this file. 1921 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1922 return false; 1923 } 1924 1925 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1926 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 switch(opcode) { 1930 // Unary masked operations 1931 case Op_AbsVB: 1932 case Op_AbsVS: 1933 if(!VM_Version::supports_avx512bw()) { 1934 return false; // Implementation limitation 1935 } 1936 case Op_AbsVI: 1937 case Op_AbsVL: 1938 return true; 1939 1940 // Ternary masked operations 1941 case Op_FmaVF: 1942 case Op_FmaVD: 1943 return true; 1944 1945 case Op_MacroLogicV: 1946 if(bt != T_INT && bt != T_LONG) { 1947 return false; 1948 } 1949 return true; 1950 1951 // Binary masked operations 1952 case Op_AddVB: 1953 case Op_AddVS: 1954 case Op_SubVB: 1955 case Op_SubVS: 1956 case Op_MulVS: 1957 case Op_LShiftVS: 1958 case Op_RShiftVS: 1959 case Op_URShiftVS: 1960 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1961 if (!VM_Version::supports_avx512bw()) { 1962 return false; // Implementation limitation 1963 } 1964 return true; 1965 1966 case Op_MulVL: 1967 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1968 if (!VM_Version::supports_avx512dq()) { 1969 return false; // Implementation limitation 1970 } 1971 return true; 1972 1973 case Op_AndV: 1974 case Op_OrV: 1975 case Op_XorV: 1976 case Op_RotateRightV: 1977 case Op_RotateLeftV: 1978 if (bt != T_INT && bt != T_LONG) { 1979 return false; // Implementation limitation 1980 } 1981 return true; 1982 1983 case Op_VectorLoadMask: 1984 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1985 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 return true; 1989 1990 case Op_AddVI: 1991 case Op_AddVL: 1992 case Op_AddVF: 1993 case Op_AddVD: 1994 case Op_SubVI: 1995 case Op_SubVL: 1996 case Op_SubVF: 1997 case Op_SubVD: 1998 case Op_MulVI: 1999 case Op_MulVF: 2000 case Op_MulVD: 2001 case Op_DivVF: 2002 case Op_DivVD: 2003 case Op_SqrtVF: 2004 case Op_SqrtVD: 2005 case Op_LShiftVI: 2006 case Op_LShiftVL: 2007 case Op_RShiftVI: 2008 case Op_RShiftVL: 2009 case Op_URShiftVI: 2010 case Op_URShiftVL: 2011 case Op_LoadVectorMasked: 2012 case Op_StoreVectorMasked: 2013 case Op_LoadVectorGatherMasked: 2014 case Op_StoreVectorScatterMasked: 2015 return true; 2016 2017 case Op_UMinV: 2018 case Op_UMaxV: 2019 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2020 return false; 2021 } // fallthrough 2022 case Op_MaxV: 2023 case Op_MinV: 2024 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2025 return false; // Implementation limitation 2026 } 2027 if (is_floating_point_type(bt)) { 2028 return false; // Implementation limitation 2029 } 2030 return true; 2031 case Op_SaturatingAddV: 2032 case Op_SaturatingSubV: 2033 if (!is_subword_type(bt)) { 2034 return false; 2035 } 2036 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2037 return false; // Implementation limitation 2038 } 2039 return true; 2040 2041 case Op_VectorMaskCmp: 2042 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 return true; 2046 2047 case Op_VectorRearrange: 2048 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2049 return false; // Implementation limitation 2050 } 2051 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2052 return false; // Implementation limitation 2053 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2054 return false; // Implementation limitation 2055 } 2056 return true; 2057 2058 // Binary Logical operations 2059 case Op_AndVMask: 2060 case Op_OrVMask: 2061 case Op_XorVMask: 2062 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2063 return false; // Implementation limitation 2064 } 2065 return true; 2066 2067 case Op_PopCountVI: 2068 case Op_PopCountVL: 2069 if (!is_pop_count_instr_target(bt)) { 2070 return false; 2071 } 2072 return true; 2073 2074 case Op_MaskAll: 2075 return true; 2076 2077 case Op_CountLeadingZerosV: 2078 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2079 return true; 2080 } 2081 default: 2082 return false; 2083 } 2084 } 2085 2086 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2087 return false; 2088 } 2089 2090 // Return true if Vector::rearrange needs preparation of the shuffle argument 2091 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2092 switch (elem_bt) { 2093 case T_BYTE: return false; 2094 case T_SHORT: return !VM_Version::supports_avx512bw(); 2095 case T_INT: return !VM_Version::supports_avx(); 2096 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2097 default: 2098 ShouldNotReachHere(); 2099 return false; 2100 } 2101 } 2102 2103 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2104 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2105 bool legacy = (generic_opnd->opcode() == LEGVEC); 2106 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2107 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2108 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2109 return new legVecZOper(); 2110 } 2111 if (legacy) { 2112 switch (ideal_reg) { 2113 case Op_VecS: return new legVecSOper(); 2114 case Op_VecD: return new legVecDOper(); 2115 case Op_VecX: return new legVecXOper(); 2116 case Op_VecY: return new legVecYOper(); 2117 case Op_VecZ: return new legVecZOper(); 2118 } 2119 } else { 2120 switch (ideal_reg) { 2121 case Op_VecS: return new vecSOper(); 2122 case Op_VecD: return new vecDOper(); 2123 case Op_VecX: return new vecXOper(); 2124 case Op_VecY: return new vecYOper(); 2125 case Op_VecZ: return new vecZOper(); 2126 } 2127 } 2128 ShouldNotReachHere(); 2129 return nullptr; 2130 } 2131 2132 bool Matcher::is_reg2reg_move(MachNode* m) { 2133 switch (m->rule()) { 2134 case MoveVec2Leg_rule: 2135 case MoveLeg2Vec_rule: 2136 case MoveF2VL_rule: 2137 case MoveF2LEG_rule: 2138 case MoveVL2F_rule: 2139 case MoveLEG2F_rule: 2140 case MoveD2VL_rule: 2141 case MoveD2LEG_rule: 2142 case MoveVL2D_rule: 2143 case MoveLEG2D_rule: 2144 return true; 2145 default: 2146 return false; 2147 } 2148 } 2149 2150 bool Matcher::is_generic_vector(MachOper* opnd) { 2151 switch (opnd->opcode()) { 2152 case VEC: 2153 case LEGVEC: 2154 return true; 2155 default: 2156 return false; 2157 } 2158 } 2159 2160 //------------------------------------------------------------------------ 2161 2162 const RegMask* Matcher::predicate_reg_mask(void) { 2163 return &_VECTMASK_REG_mask; 2164 } 2165 2166 // Max vector size in bytes. 0 if not supported. 2167 int Matcher::vector_width_in_bytes(BasicType bt) { 2168 assert(is_java_primitive(bt), "only primitive type vectors"); 2169 // SSE2 supports 128bit vectors for all types. 2170 // AVX2 supports 256bit vectors for all types. 2171 // AVX2/EVEX supports 512bit vectors for all types. 2172 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2173 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2174 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2175 size = (UseAVX > 2) ? 64 : 32; 2176 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2177 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2178 // Use flag to limit vector size. 2179 size = MIN2(size,(int)MaxVectorSize); 2180 // Minimum 2 values in vector (or 4 for bytes). 2181 switch (bt) { 2182 case T_DOUBLE: 2183 case T_LONG: 2184 if (size < 16) return 0; 2185 break; 2186 case T_FLOAT: 2187 case T_INT: 2188 if (size < 8) return 0; 2189 break; 2190 case T_BOOLEAN: 2191 if (size < 4) return 0; 2192 break; 2193 case T_CHAR: 2194 if (size < 4) return 0; 2195 break; 2196 case T_BYTE: 2197 if (size < 4) return 0; 2198 break; 2199 case T_SHORT: 2200 if (size < 4) return 0; 2201 break; 2202 default: 2203 ShouldNotReachHere(); 2204 } 2205 return size; 2206 } 2207 2208 // Limits on vector size (number of elements) loaded into vector. 2209 int Matcher::max_vector_size(const BasicType bt) { 2210 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2211 } 2212 int Matcher::min_vector_size(const BasicType bt) { 2213 int max_size = max_vector_size(bt); 2214 // Min size which can be loaded into vector is 4 bytes. 2215 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2216 // Support for calling svml double64 vectors 2217 if (bt == T_DOUBLE) { 2218 size = 1; 2219 } 2220 return MIN2(size,max_size); 2221 } 2222 2223 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2224 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2225 // by default on Cascade Lake 2226 if (VM_Version::is_default_intel_cascade_lake()) { 2227 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2228 } 2229 return Matcher::max_vector_size(bt); 2230 } 2231 2232 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2233 return -1; 2234 } 2235 2236 // Vector ideal reg corresponding to specified size in bytes 2237 uint Matcher::vector_ideal_reg(int size) { 2238 assert(MaxVectorSize >= size, ""); 2239 switch(size) { 2240 case 4: return Op_VecS; 2241 case 8: return Op_VecD; 2242 case 16: return Op_VecX; 2243 case 32: return Op_VecY; 2244 case 64: return Op_VecZ; 2245 } 2246 ShouldNotReachHere(); 2247 return 0; 2248 } 2249 2250 // Check for shift by small constant as well 2251 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2252 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2253 shift->in(2)->get_int() <= 3 && 2254 // Are there other uses besides address expressions? 2255 !matcher->is_visited(shift)) { 2256 address_visited.set(shift->_idx); // Flag as address_visited 2257 mstack.push(shift->in(2), Matcher::Visit); 2258 Node *conv = shift->in(1); 2259 // Allow Matcher to match the rule which bypass 2260 // ConvI2L operation for an array index on LP64 2261 // if the index value is positive. 2262 if (conv->Opcode() == Op_ConvI2L && 2263 conv->as_Type()->type()->is_long()->_lo >= 0 && 2264 // Are there other uses besides address expressions? 2265 !matcher->is_visited(conv)) { 2266 address_visited.set(conv->_idx); // Flag as address_visited 2267 mstack.push(conv->in(1), Matcher::Pre_Visit); 2268 } else { 2269 mstack.push(conv, Matcher::Pre_Visit); 2270 } 2271 return true; 2272 } 2273 return false; 2274 } 2275 2276 // This function identifies sub-graphs in which a 'load' node is 2277 // input to two different nodes, and such that it can be matched 2278 // with BMI instructions like blsi, blsr, etc. 2279 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2280 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2281 // refers to the same node. 2282 // 2283 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2284 // This is a temporary solution until we make DAGs expressible in ADL. 2285 template<typename ConType> 2286 class FusedPatternMatcher { 2287 Node* _op1_node; 2288 Node* _mop_node; 2289 int _con_op; 2290 2291 static int match_next(Node* n, int next_op, int next_op_idx) { 2292 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2293 return -1; 2294 } 2295 2296 if (next_op_idx == -1) { // n is commutative, try rotations 2297 if (n->in(1)->Opcode() == next_op) { 2298 return 1; 2299 } else if (n->in(2)->Opcode() == next_op) { 2300 return 2; 2301 } 2302 } else { 2303 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2304 if (n->in(next_op_idx)->Opcode() == next_op) { 2305 return next_op_idx; 2306 } 2307 } 2308 return -1; 2309 } 2310 2311 public: 2312 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2313 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2314 2315 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2316 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2317 typename ConType::NativeType con_value) { 2318 if (_op1_node->Opcode() != op1) { 2319 return false; 2320 } 2321 if (_mop_node->outcnt() > 2) { 2322 return false; 2323 } 2324 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2325 if (op1_op2_idx == -1) { 2326 return false; 2327 } 2328 // Memory operation must be the other edge 2329 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2330 2331 // Check that the mop node is really what we want 2332 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2333 Node* op2_node = _op1_node->in(op1_op2_idx); 2334 if (op2_node->outcnt() > 1) { 2335 return false; 2336 } 2337 assert(op2_node->Opcode() == op2, "Should be"); 2338 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2339 if (op2_con_idx == -1) { 2340 return false; 2341 } 2342 // Memory operation must be the other edge 2343 int op2_mop_idx = (op2_con_idx & 1) + 1; 2344 // Check that the memory operation is the same node 2345 if (op2_node->in(op2_mop_idx) == _mop_node) { 2346 // Now check the constant 2347 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2348 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2349 return true; 2350 } 2351 } 2352 } 2353 return false; 2354 } 2355 }; 2356 2357 static bool is_bmi_pattern(Node* n, Node* m) { 2358 assert(UseBMI1Instructions, "sanity"); 2359 if (n != nullptr && m != nullptr) { 2360 if (m->Opcode() == Op_LoadI) { 2361 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2362 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2363 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2364 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2365 } else if (m->Opcode() == Op_LoadL) { 2366 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2367 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2368 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2369 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2370 } 2371 } 2372 return false; 2373 } 2374 2375 // Should the matcher clone input 'm' of node 'n'? 2376 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2377 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2378 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2379 mstack.push(m, Visit); 2380 return true; 2381 } 2382 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2383 mstack.push(m, Visit); // m = ShiftCntV 2384 return true; 2385 } 2386 if (is_encode_and_store_pattern(n, m)) { 2387 mstack.push(m, Visit); 2388 return true; 2389 } 2390 return false; 2391 } 2392 2393 // Should the Matcher clone shifts on addressing modes, expecting them 2394 // to be subsumed into complex addressing expressions or compute them 2395 // into registers? 2396 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2397 Node *off = m->in(AddPNode::Offset); 2398 if (off->is_Con()) { 2399 address_visited.test_set(m->_idx); // Flag as address_visited 2400 Node *adr = m->in(AddPNode::Address); 2401 2402 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2403 // AtomicAdd is not an addressing expression. 2404 // Cheap to find it by looking for screwy base. 2405 if (adr->is_AddP() && 2406 !adr->in(AddPNode::Base)->is_top() && 2407 !adr->in(AddPNode::Offset)->is_Con() && 2408 off->get_long() == (int) (off->get_long()) && // immL32 2409 // Are there other uses besides address expressions? 2410 !is_visited(adr)) { 2411 address_visited.set(adr->_idx); // Flag as address_visited 2412 Node *shift = adr->in(AddPNode::Offset); 2413 if (!clone_shift(shift, this, mstack, address_visited)) { 2414 mstack.push(shift, Pre_Visit); 2415 } 2416 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2417 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2418 } else { 2419 mstack.push(adr, Pre_Visit); 2420 } 2421 2422 // Clone X+offset as it also folds into most addressing expressions 2423 mstack.push(off, Visit); 2424 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2425 return true; 2426 } else if (clone_shift(off, this, mstack, address_visited)) { 2427 address_visited.test_set(m->_idx); // Flag as address_visited 2428 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2429 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2436 switch (bt) { 2437 case BoolTest::eq: 2438 return Assembler::eq; 2439 case BoolTest::ne: 2440 return Assembler::neq; 2441 case BoolTest::le: 2442 case BoolTest::ule: 2443 return Assembler::le; 2444 case BoolTest::ge: 2445 case BoolTest::uge: 2446 return Assembler::nlt; 2447 case BoolTest::lt: 2448 case BoolTest::ult: 2449 return Assembler::lt; 2450 case BoolTest::gt: 2451 case BoolTest::ugt: 2452 return Assembler::nle; 2453 default : ShouldNotReachHere(); return Assembler::_false; 2454 } 2455 } 2456 2457 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2458 switch (bt) { 2459 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2460 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2461 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2462 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2463 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2464 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2465 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2466 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2467 } 2468 } 2469 2470 // Helper methods for MachSpillCopyNode::implementation(). 2471 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2472 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2473 assert(ireg == Op_VecS || // 32bit vector 2474 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2475 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2476 "no non-adjacent vector moves" ); 2477 if (masm) { 2478 switch (ireg) { 2479 case Op_VecS: // copy whole register 2480 case Op_VecD: 2481 case Op_VecX: 2482 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2483 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2484 } else { 2485 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2486 } 2487 break; 2488 case Op_VecY: 2489 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2490 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2491 } else { 2492 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2493 } 2494 break; 2495 case Op_VecZ: 2496 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2497 break; 2498 default: 2499 ShouldNotReachHere(); 2500 } 2501 #ifndef PRODUCT 2502 } else { 2503 switch (ireg) { 2504 case Op_VecS: 2505 case Op_VecD: 2506 case Op_VecX: 2507 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2508 break; 2509 case Op_VecY: 2510 case Op_VecZ: 2511 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2512 break; 2513 default: 2514 ShouldNotReachHere(); 2515 } 2516 #endif 2517 } 2518 } 2519 2520 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2521 int stack_offset, int reg, uint ireg, outputStream* st) { 2522 if (masm) { 2523 if (is_load) { 2524 switch (ireg) { 2525 case Op_VecS: 2526 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2527 break; 2528 case Op_VecD: 2529 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecX: 2532 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2533 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2534 } else { 2535 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2536 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2537 } 2538 break; 2539 case Op_VecY: 2540 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2541 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2542 } else { 2543 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2544 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2545 } 2546 break; 2547 case Op_VecZ: 2548 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2549 break; 2550 default: 2551 ShouldNotReachHere(); 2552 } 2553 } else { // store 2554 switch (ireg) { 2555 case Op_VecS: 2556 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2557 break; 2558 case Op_VecD: 2559 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecX: 2562 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2563 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2564 } 2565 else { 2566 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2567 } 2568 break; 2569 case Op_VecY: 2570 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2571 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2572 } 2573 else { 2574 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2575 } 2576 break; 2577 case Op_VecZ: 2578 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2579 break; 2580 default: 2581 ShouldNotReachHere(); 2582 } 2583 } 2584 #ifndef PRODUCT 2585 } else { 2586 if (is_load) { 2587 switch (ireg) { 2588 case Op_VecS: 2589 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2590 break; 2591 case Op_VecD: 2592 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecX: 2595 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecY: 2598 case Op_VecZ: 2599 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2600 break; 2601 default: 2602 ShouldNotReachHere(); 2603 } 2604 } else { // store 2605 switch (ireg) { 2606 case Op_VecS: 2607 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2608 break; 2609 case Op_VecD: 2610 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecX: 2613 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecY: 2616 case Op_VecZ: 2617 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 } 2623 #endif 2624 } 2625 } 2626 2627 template <class T> 2628 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2629 int size = type2aelembytes(bt) * len; 2630 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2631 for (int i = 0; i < len; i++) { 2632 int offset = i * type2aelembytes(bt); 2633 switch (bt) { 2634 case T_BYTE: val->at(i) = con; break; 2635 case T_SHORT: { 2636 jshort c = con; 2637 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2638 break; 2639 } 2640 case T_INT: { 2641 jint c = con; 2642 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2643 break; 2644 } 2645 case T_LONG: { 2646 jlong c = con; 2647 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2648 break; 2649 } 2650 case T_FLOAT: { 2651 jfloat c = con; 2652 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2653 break; 2654 } 2655 case T_DOUBLE: { 2656 jdouble c = con; 2657 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2658 break; 2659 } 2660 default: assert(false, "%s", type2name(bt)); 2661 } 2662 } 2663 return val; 2664 } 2665 2666 static inline jlong high_bit_set(BasicType bt) { 2667 switch (bt) { 2668 case T_BYTE: return 0x8080808080808080; 2669 case T_SHORT: return 0x8000800080008000; 2670 case T_INT: return 0x8000000080000000; 2671 case T_LONG: return 0x8000000000000000; 2672 default: 2673 ShouldNotReachHere(); 2674 return 0; 2675 } 2676 } 2677 2678 #ifndef PRODUCT 2679 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2680 st->print("nop \t# %d bytes pad for loops and calls", _count); 2681 } 2682 #endif 2683 2684 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2685 __ nop(_count); 2686 } 2687 2688 uint MachNopNode::size(PhaseRegAlloc*) const { 2689 return _count; 2690 } 2691 2692 #ifndef PRODUCT 2693 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2694 st->print("# breakpoint"); 2695 } 2696 #endif 2697 2698 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2699 __ int3(); 2700 } 2701 2702 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2703 return MachNode::size(ra_); 2704 } 2705 2706 %} 2707 2708 encode %{ 2709 2710 enc_class call_epilog %{ 2711 if (VerifyStackAtCalls) { 2712 // Check that stack depth is unchanged: find majik cookie on stack 2713 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2714 Label L; 2715 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2716 __ jccb(Assembler::equal, L); 2717 // Die if stack mismatch 2718 __ int3(); 2719 __ bind(L); 2720 } 2721 %} 2722 2723 %} 2724 2725 // Operands for bound floating pointer register arguments 2726 operand rxmm0() %{ 2727 constraint(ALLOC_IN_RC(xmm0_reg)); 2728 match(VecX); 2729 format%{%} 2730 interface(REG_INTER); 2731 %} 2732 2733 //----------OPERANDS----------------------------------------------------------- 2734 // Operand definitions must precede instruction definitions for correct parsing 2735 // in the ADLC because operands constitute user defined types which are used in 2736 // instruction definitions. 2737 2738 // Vectors 2739 2740 // Dummy generic vector class. Should be used for all vector operands. 2741 // Replaced with vec[SDXYZ] during post-selection pass. 2742 operand vec() %{ 2743 constraint(ALLOC_IN_RC(dynamic)); 2744 match(VecX); 2745 match(VecY); 2746 match(VecZ); 2747 match(VecS); 2748 match(VecD); 2749 2750 format %{ %} 2751 interface(REG_INTER); 2752 %} 2753 2754 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2755 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2756 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2757 // runtime code generation via reg_class_dynamic. 2758 operand legVec() %{ 2759 constraint(ALLOC_IN_RC(dynamic)); 2760 match(VecX); 2761 match(VecY); 2762 match(VecZ); 2763 match(VecS); 2764 match(VecD); 2765 2766 format %{ %} 2767 interface(REG_INTER); 2768 %} 2769 2770 // Replaces vec during post-selection cleanup. See above. 2771 operand vecS() %{ 2772 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2773 match(VecS); 2774 2775 format %{ %} 2776 interface(REG_INTER); 2777 %} 2778 2779 // Replaces legVec during post-selection cleanup. See above. 2780 operand legVecS() %{ 2781 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2782 match(VecS); 2783 2784 format %{ %} 2785 interface(REG_INTER); 2786 %} 2787 2788 // Replaces vec during post-selection cleanup. See above. 2789 operand vecD() %{ 2790 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2791 match(VecD); 2792 2793 format %{ %} 2794 interface(REG_INTER); 2795 %} 2796 2797 // Replaces legVec during post-selection cleanup. See above. 2798 operand legVecD() %{ 2799 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2800 match(VecD); 2801 2802 format %{ %} 2803 interface(REG_INTER); 2804 %} 2805 2806 // Replaces vec during post-selection cleanup. See above. 2807 operand vecX() %{ 2808 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2809 match(VecX); 2810 2811 format %{ %} 2812 interface(REG_INTER); 2813 %} 2814 2815 // Replaces legVec during post-selection cleanup. See above. 2816 operand legVecX() %{ 2817 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2818 match(VecX); 2819 2820 format %{ %} 2821 interface(REG_INTER); 2822 %} 2823 2824 // Replaces vec during post-selection cleanup. See above. 2825 operand vecY() %{ 2826 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2827 match(VecY); 2828 2829 format %{ %} 2830 interface(REG_INTER); 2831 %} 2832 2833 // Replaces legVec during post-selection cleanup. See above. 2834 operand legVecY() %{ 2835 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2836 match(VecY); 2837 2838 format %{ %} 2839 interface(REG_INTER); 2840 %} 2841 2842 // Replaces vec during post-selection cleanup. See above. 2843 operand vecZ() %{ 2844 constraint(ALLOC_IN_RC(vectorz_reg)); 2845 match(VecZ); 2846 2847 format %{ %} 2848 interface(REG_INTER); 2849 %} 2850 2851 // Replaces legVec during post-selection cleanup. See above. 2852 operand legVecZ() %{ 2853 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2854 match(VecZ); 2855 2856 format %{ %} 2857 interface(REG_INTER); 2858 %} 2859 2860 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2861 2862 // ============================================================================ 2863 2864 instruct ShouldNotReachHere() %{ 2865 match(Halt); 2866 format %{ "stop\t# ShouldNotReachHere" %} 2867 ins_encode %{ 2868 if (is_reachable()) { 2869 const char* str = __ code_string(_halt_reason); 2870 __ stop(str); 2871 } 2872 %} 2873 ins_pipe(pipe_slow); 2874 %} 2875 2876 // ============================================================================ 2877 2878 instruct addF_reg(regF dst, regF src) %{ 2879 predicate(UseAVX == 0); 2880 match(Set dst (AddF dst src)); 2881 2882 format %{ "addss $dst, $src" %} 2883 ins_cost(150); 2884 ins_encode %{ 2885 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2886 %} 2887 ins_pipe(pipe_slow); 2888 %} 2889 2890 instruct addF_mem(regF dst, memory src) %{ 2891 predicate(UseAVX == 0); 2892 match(Set dst (AddF dst (LoadF src))); 2893 2894 format %{ "addss $dst, $src" %} 2895 ins_cost(150); 2896 ins_encode %{ 2897 __ addss($dst$$XMMRegister, $src$$Address); 2898 %} 2899 ins_pipe(pipe_slow); 2900 %} 2901 2902 instruct addF_imm(regF dst, immF con) %{ 2903 predicate(UseAVX == 0); 2904 match(Set dst (AddF dst con)); 2905 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2906 ins_cost(150); 2907 ins_encode %{ 2908 __ addss($dst$$XMMRegister, $constantaddress($con)); 2909 %} 2910 ins_pipe(pipe_slow); 2911 %} 2912 2913 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2914 predicate(UseAVX > 0); 2915 match(Set dst (AddF src1 src2)); 2916 2917 format %{ "vaddss $dst, $src1, $src2" %} 2918 ins_cost(150); 2919 ins_encode %{ 2920 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2921 %} 2922 ins_pipe(pipe_slow); 2923 %} 2924 2925 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2926 predicate(UseAVX > 0); 2927 match(Set dst (AddF src1 (LoadF src2))); 2928 2929 format %{ "vaddss $dst, $src1, $src2" %} 2930 ins_cost(150); 2931 ins_encode %{ 2932 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2933 %} 2934 ins_pipe(pipe_slow); 2935 %} 2936 2937 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2938 predicate(UseAVX > 0); 2939 match(Set dst (AddF src con)); 2940 2941 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2942 ins_cost(150); 2943 ins_encode %{ 2944 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2945 %} 2946 ins_pipe(pipe_slow); 2947 %} 2948 2949 instruct addD_reg(regD dst, regD src) %{ 2950 predicate(UseAVX == 0); 2951 match(Set dst (AddD dst src)); 2952 2953 format %{ "addsd $dst, $src" %} 2954 ins_cost(150); 2955 ins_encode %{ 2956 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2957 %} 2958 ins_pipe(pipe_slow); 2959 %} 2960 2961 instruct addD_mem(regD dst, memory src) %{ 2962 predicate(UseAVX == 0); 2963 match(Set dst (AddD dst (LoadD src))); 2964 2965 format %{ "addsd $dst, $src" %} 2966 ins_cost(150); 2967 ins_encode %{ 2968 __ addsd($dst$$XMMRegister, $src$$Address); 2969 %} 2970 ins_pipe(pipe_slow); 2971 %} 2972 2973 instruct addD_imm(regD dst, immD con) %{ 2974 predicate(UseAVX == 0); 2975 match(Set dst (AddD dst con)); 2976 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2977 ins_cost(150); 2978 ins_encode %{ 2979 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2985 predicate(UseAVX > 0); 2986 match(Set dst (AddD src1 src2)); 2987 2988 format %{ "vaddsd $dst, $src1, $src2" %} 2989 ins_cost(150); 2990 ins_encode %{ 2991 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2997 predicate(UseAVX > 0); 2998 match(Set dst (AddD src1 (LoadD src2))); 2999 3000 format %{ "vaddsd $dst, $src1, $src2" %} 3001 ins_cost(150); 3002 ins_encode %{ 3003 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3009 predicate(UseAVX > 0); 3010 match(Set dst (AddD src con)); 3011 3012 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3013 ins_cost(150); 3014 ins_encode %{ 3015 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct subF_reg(regF dst, regF src) %{ 3021 predicate(UseAVX == 0); 3022 match(Set dst (SubF dst src)); 3023 3024 format %{ "subss $dst, $src" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct subF_mem(regF dst, memory src) %{ 3033 predicate(UseAVX == 0); 3034 match(Set dst (SubF dst (LoadF src))); 3035 3036 format %{ "subss $dst, $src" %} 3037 ins_cost(150); 3038 ins_encode %{ 3039 __ subss($dst$$XMMRegister, $src$$Address); 3040 %} 3041 ins_pipe(pipe_slow); 3042 %} 3043 3044 instruct subF_imm(regF dst, immF con) %{ 3045 predicate(UseAVX == 0); 3046 match(Set dst (SubF dst con)); 3047 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ subss($dst$$XMMRegister, $constantaddress($con)); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3056 predicate(UseAVX > 0); 3057 match(Set dst (SubF src1 src2)); 3058 3059 format %{ "vsubss $dst, $src1, $src2" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3068 predicate(UseAVX > 0); 3069 match(Set dst (SubF src1 (LoadF src2))); 3070 3071 format %{ "vsubss $dst, $src1, $src2" %} 3072 ins_cost(150); 3073 ins_encode %{ 3074 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3075 %} 3076 ins_pipe(pipe_slow); 3077 %} 3078 3079 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3080 predicate(UseAVX > 0); 3081 match(Set dst (SubF src con)); 3082 3083 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3084 ins_cost(150); 3085 ins_encode %{ 3086 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3087 %} 3088 ins_pipe(pipe_slow); 3089 %} 3090 3091 instruct subD_reg(regD dst, regD src) %{ 3092 predicate(UseAVX == 0); 3093 match(Set dst (SubD dst src)); 3094 3095 format %{ "subsd $dst, $src" %} 3096 ins_cost(150); 3097 ins_encode %{ 3098 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3099 %} 3100 ins_pipe(pipe_slow); 3101 %} 3102 3103 instruct subD_mem(regD dst, memory src) %{ 3104 predicate(UseAVX == 0); 3105 match(Set dst (SubD dst (LoadD src))); 3106 3107 format %{ "subsd $dst, $src" %} 3108 ins_cost(150); 3109 ins_encode %{ 3110 __ subsd($dst$$XMMRegister, $src$$Address); 3111 %} 3112 ins_pipe(pipe_slow); 3113 %} 3114 3115 instruct subD_imm(regD dst, immD con) %{ 3116 predicate(UseAVX == 0); 3117 match(Set dst (SubD dst con)); 3118 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3127 predicate(UseAVX > 0); 3128 match(Set dst (SubD src1 src2)); 3129 3130 format %{ "vsubsd $dst, $src1, $src2" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3139 predicate(UseAVX > 0); 3140 match(Set dst (SubD src1 (LoadD src2))); 3141 3142 format %{ "vsubsd $dst, $src1, $src2" %} 3143 ins_cost(150); 3144 ins_encode %{ 3145 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3146 %} 3147 ins_pipe(pipe_slow); 3148 %} 3149 3150 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3151 predicate(UseAVX > 0); 3152 match(Set dst (SubD src con)); 3153 3154 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3155 ins_cost(150); 3156 ins_encode %{ 3157 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3158 %} 3159 ins_pipe(pipe_slow); 3160 %} 3161 3162 instruct mulF_reg(regF dst, regF src) %{ 3163 predicate(UseAVX == 0); 3164 match(Set dst (MulF dst src)); 3165 3166 format %{ "mulss $dst, $src" %} 3167 ins_cost(150); 3168 ins_encode %{ 3169 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3170 %} 3171 ins_pipe(pipe_slow); 3172 %} 3173 3174 instruct mulF_mem(regF dst, memory src) %{ 3175 predicate(UseAVX == 0); 3176 match(Set dst (MulF dst (LoadF src))); 3177 3178 format %{ "mulss $dst, $src" %} 3179 ins_cost(150); 3180 ins_encode %{ 3181 __ mulss($dst$$XMMRegister, $src$$Address); 3182 %} 3183 ins_pipe(pipe_slow); 3184 %} 3185 3186 instruct mulF_imm(regF dst, immF con) %{ 3187 predicate(UseAVX == 0); 3188 match(Set dst (MulF dst con)); 3189 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3190 ins_cost(150); 3191 ins_encode %{ 3192 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3193 %} 3194 ins_pipe(pipe_slow); 3195 %} 3196 3197 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3198 predicate(UseAVX > 0); 3199 match(Set dst (MulF src1 src2)); 3200 3201 format %{ "vmulss $dst, $src1, $src2" %} 3202 ins_cost(150); 3203 ins_encode %{ 3204 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3210 predicate(UseAVX > 0); 3211 match(Set dst (MulF src1 (LoadF src2))); 3212 3213 format %{ "vmulss $dst, $src1, $src2" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3222 predicate(UseAVX > 0); 3223 match(Set dst (MulF src con)); 3224 3225 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3226 ins_cost(150); 3227 ins_encode %{ 3228 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3229 %} 3230 ins_pipe(pipe_slow); 3231 %} 3232 3233 instruct mulD_reg(regD dst, regD src) %{ 3234 predicate(UseAVX == 0); 3235 match(Set dst (MulD dst src)); 3236 3237 format %{ "mulsd $dst, $src" %} 3238 ins_cost(150); 3239 ins_encode %{ 3240 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3241 %} 3242 ins_pipe(pipe_slow); 3243 %} 3244 3245 instruct mulD_mem(regD dst, memory src) %{ 3246 predicate(UseAVX == 0); 3247 match(Set dst (MulD dst (LoadD src))); 3248 3249 format %{ "mulsd $dst, $src" %} 3250 ins_cost(150); 3251 ins_encode %{ 3252 __ mulsd($dst$$XMMRegister, $src$$Address); 3253 %} 3254 ins_pipe(pipe_slow); 3255 %} 3256 3257 instruct mulD_imm(regD dst, immD con) %{ 3258 predicate(UseAVX == 0); 3259 match(Set dst (MulD dst con)); 3260 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3261 ins_cost(150); 3262 ins_encode %{ 3263 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3264 %} 3265 ins_pipe(pipe_slow); 3266 %} 3267 3268 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3269 predicate(UseAVX > 0); 3270 match(Set dst (MulD src1 src2)); 3271 3272 format %{ "vmulsd $dst, $src1, $src2" %} 3273 ins_cost(150); 3274 ins_encode %{ 3275 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3276 %} 3277 ins_pipe(pipe_slow); 3278 %} 3279 3280 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3281 predicate(UseAVX > 0); 3282 match(Set dst (MulD src1 (LoadD src2))); 3283 3284 format %{ "vmulsd $dst, $src1, $src2" %} 3285 ins_cost(150); 3286 ins_encode %{ 3287 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3288 %} 3289 ins_pipe(pipe_slow); 3290 %} 3291 3292 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3293 predicate(UseAVX > 0); 3294 match(Set dst (MulD src con)); 3295 3296 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3297 ins_cost(150); 3298 ins_encode %{ 3299 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3300 %} 3301 ins_pipe(pipe_slow); 3302 %} 3303 3304 instruct divF_reg(regF dst, regF src) %{ 3305 predicate(UseAVX == 0); 3306 match(Set dst (DivF dst src)); 3307 3308 format %{ "divss $dst, $src" %} 3309 ins_cost(150); 3310 ins_encode %{ 3311 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3312 %} 3313 ins_pipe(pipe_slow); 3314 %} 3315 3316 instruct divF_mem(regF dst, memory src) %{ 3317 predicate(UseAVX == 0); 3318 match(Set dst (DivF dst (LoadF src))); 3319 3320 format %{ "divss $dst, $src" %} 3321 ins_cost(150); 3322 ins_encode %{ 3323 __ divss($dst$$XMMRegister, $src$$Address); 3324 %} 3325 ins_pipe(pipe_slow); 3326 %} 3327 3328 instruct divF_imm(regF dst, immF con) %{ 3329 predicate(UseAVX == 0); 3330 match(Set dst (DivF dst con)); 3331 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3332 ins_cost(150); 3333 ins_encode %{ 3334 __ divss($dst$$XMMRegister, $constantaddress($con)); 3335 %} 3336 ins_pipe(pipe_slow); 3337 %} 3338 3339 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3340 predicate(UseAVX > 0); 3341 match(Set dst (DivF src1 src2)); 3342 3343 format %{ "vdivss $dst, $src1, $src2" %} 3344 ins_cost(150); 3345 ins_encode %{ 3346 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3347 %} 3348 ins_pipe(pipe_slow); 3349 %} 3350 3351 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3352 predicate(UseAVX > 0); 3353 match(Set dst (DivF src1 (LoadF src2))); 3354 3355 format %{ "vdivss $dst, $src1, $src2" %} 3356 ins_cost(150); 3357 ins_encode %{ 3358 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3359 %} 3360 ins_pipe(pipe_slow); 3361 %} 3362 3363 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3364 predicate(UseAVX > 0); 3365 match(Set dst (DivF src con)); 3366 3367 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3368 ins_cost(150); 3369 ins_encode %{ 3370 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3371 %} 3372 ins_pipe(pipe_slow); 3373 %} 3374 3375 instruct divD_reg(regD dst, regD src) %{ 3376 predicate(UseAVX == 0); 3377 match(Set dst (DivD dst src)); 3378 3379 format %{ "divsd $dst, $src" %} 3380 ins_cost(150); 3381 ins_encode %{ 3382 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3383 %} 3384 ins_pipe(pipe_slow); 3385 %} 3386 3387 instruct divD_mem(regD dst, memory src) %{ 3388 predicate(UseAVX == 0); 3389 match(Set dst (DivD dst (LoadD src))); 3390 3391 format %{ "divsd $dst, $src" %} 3392 ins_cost(150); 3393 ins_encode %{ 3394 __ divsd($dst$$XMMRegister, $src$$Address); 3395 %} 3396 ins_pipe(pipe_slow); 3397 %} 3398 3399 instruct divD_imm(regD dst, immD con) %{ 3400 predicate(UseAVX == 0); 3401 match(Set dst (DivD dst con)); 3402 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3403 ins_cost(150); 3404 ins_encode %{ 3405 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3406 %} 3407 ins_pipe(pipe_slow); 3408 %} 3409 3410 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3411 predicate(UseAVX > 0); 3412 match(Set dst (DivD src1 src2)); 3413 3414 format %{ "vdivsd $dst, $src1, $src2" %} 3415 ins_cost(150); 3416 ins_encode %{ 3417 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3418 %} 3419 ins_pipe(pipe_slow); 3420 %} 3421 3422 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3423 predicate(UseAVX > 0); 3424 match(Set dst (DivD src1 (LoadD src2))); 3425 3426 format %{ "vdivsd $dst, $src1, $src2" %} 3427 ins_cost(150); 3428 ins_encode %{ 3429 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3430 %} 3431 ins_pipe(pipe_slow); 3432 %} 3433 3434 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3435 predicate(UseAVX > 0); 3436 match(Set dst (DivD src con)); 3437 3438 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3439 ins_cost(150); 3440 ins_encode %{ 3441 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3442 %} 3443 ins_pipe(pipe_slow); 3444 %} 3445 3446 instruct absF_reg(regF dst) %{ 3447 predicate(UseAVX == 0); 3448 match(Set dst (AbsF dst)); 3449 ins_cost(150); 3450 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3451 ins_encode %{ 3452 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3453 %} 3454 ins_pipe(pipe_slow); 3455 %} 3456 3457 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3458 predicate(UseAVX > 0); 3459 match(Set dst (AbsF src)); 3460 ins_cost(150); 3461 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3462 ins_encode %{ 3463 int vlen_enc = Assembler::AVX_128bit; 3464 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3465 ExternalAddress(float_signmask()), vlen_enc); 3466 %} 3467 ins_pipe(pipe_slow); 3468 %} 3469 3470 instruct absD_reg(regD dst) %{ 3471 predicate(UseAVX == 0); 3472 match(Set dst (AbsD dst)); 3473 ins_cost(150); 3474 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3475 "# abs double by sign masking" %} 3476 ins_encode %{ 3477 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3478 %} 3479 ins_pipe(pipe_slow); 3480 %} 3481 3482 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3483 predicate(UseAVX > 0); 3484 match(Set dst (AbsD src)); 3485 ins_cost(150); 3486 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3487 "# abs double by sign masking" %} 3488 ins_encode %{ 3489 int vlen_enc = Assembler::AVX_128bit; 3490 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3491 ExternalAddress(double_signmask()), vlen_enc); 3492 %} 3493 ins_pipe(pipe_slow); 3494 %} 3495 3496 instruct negF_reg(regF dst) %{ 3497 predicate(UseAVX == 0); 3498 match(Set dst (NegF dst)); 3499 ins_cost(150); 3500 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3501 ins_encode %{ 3502 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3503 %} 3504 ins_pipe(pipe_slow); 3505 %} 3506 3507 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3508 predicate(UseAVX > 0); 3509 match(Set dst (NegF src)); 3510 ins_cost(150); 3511 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3512 ins_encode %{ 3513 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3514 ExternalAddress(float_signflip())); 3515 %} 3516 ins_pipe(pipe_slow); 3517 %} 3518 3519 instruct negD_reg(regD dst) %{ 3520 predicate(UseAVX == 0); 3521 match(Set dst (NegD dst)); 3522 ins_cost(150); 3523 format %{ "xorpd $dst, [0x8000000000000000]\t" 3524 "# neg double by sign flipping" %} 3525 ins_encode %{ 3526 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3527 %} 3528 ins_pipe(pipe_slow); 3529 %} 3530 3531 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3532 predicate(UseAVX > 0); 3533 match(Set dst (NegD src)); 3534 ins_cost(150); 3535 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3536 "# neg double by sign flipping" %} 3537 ins_encode %{ 3538 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3539 ExternalAddress(double_signflip())); 3540 %} 3541 ins_pipe(pipe_slow); 3542 %} 3543 3544 // sqrtss instruction needs destination register to be pre initialized for best performance 3545 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3546 instruct sqrtF_reg(regF dst) %{ 3547 match(Set dst (SqrtF dst)); 3548 format %{ "sqrtss $dst, $dst" %} 3549 ins_encode %{ 3550 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe(pipe_slow); 3553 %} 3554 3555 // sqrtsd instruction needs destination register to be pre initialized for best performance 3556 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3557 instruct sqrtD_reg(regD dst) %{ 3558 match(Set dst (SqrtD dst)); 3559 format %{ "sqrtsd $dst, $dst" %} 3560 ins_encode %{ 3561 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe(pipe_slow); 3564 %} 3565 3566 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3567 effect(TEMP tmp); 3568 match(Set dst (ConvF2HF src)); 3569 ins_cost(125); 3570 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3571 ins_encode %{ 3572 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3578 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3579 effect(TEMP ktmp, TEMP rtmp); 3580 match(Set mem (StoreC mem (ConvF2HF src))); 3581 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3582 ins_encode %{ 3583 __ movl($rtmp$$Register, 0x1); 3584 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3585 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3586 %} 3587 ins_pipe( pipe_slow ); 3588 %} 3589 3590 instruct vconvF2HF(vec dst, vec src) %{ 3591 match(Set dst (VectorCastF2HF src)); 3592 format %{ "vector_conv_F2HF $dst $src" %} 3593 ins_encode %{ 3594 int vlen_enc = vector_length_encoding(this, $src); 3595 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3596 %} 3597 ins_pipe( pipe_slow ); 3598 %} 3599 3600 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3601 predicate(n->as_StoreVector()->memory_size() >= 16); 3602 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3603 format %{ "vcvtps2ph $mem,$src" %} 3604 ins_encode %{ 3605 int vlen_enc = vector_length_encoding(this, $src); 3606 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3612 match(Set dst (ConvHF2F src)); 3613 format %{ "vcvtph2ps $dst,$src" %} 3614 ins_encode %{ 3615 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3621 match(Set dst (VectorCastHF2F (LoadVector mem))); 3622 format %{ "vcvtph2ps $dst,$mem" %} 3623 ins_encode %{ 3624 int vlen_enc = vector_length_encoding(this); 3625 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3626 %} 3627 ins_pipe( pipe_slow ); 3628 %} 3629 3630 instruct vconvHF2F(vec dst, vec src) %{ 3631 match(Set dst (VectorCastHF2F src)); 3632 ins_cost(125); 3633 format %{ "vector_conv_HF2F $dst,$src" %} 3634 ins_encode %{ 3635 int vlen_enc = vector_length_encoding(this); 3636 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 // ---------------------------------------- VectorReinterpret ------------------------------------ 3642 instruct reinterpret_mask(kReg dst) %{ 3643 predicate(n->bottom_type()->isa_vectmask() && 3644 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3645 match(Set dst (VectorReinterpret dst)); 3646 ins_cost(125); 3647 format %{ "vector_reinterpret $dst\t!" %} 3648 ins_encode %{ 3649 // empty 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3655 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3656 n->bottom_type()->isa_vectmask() && 3657 n->in(1)->bottom_type()->isa_vectmask() && 3658 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3659 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3660 match(Set dst (VectorReinterpret src)); 3661 effect(TEMP xtmp); 3662 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3663 ins_encode %{ 3664 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3665 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3666 assert(src_sz == dst_sz , "src and dst size mismatch"); 3667 int vlen_enc = vector_length_encoding(src_sz); 3668 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3669 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3675 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3676 n->bottom_type()->isa_vectmask() && 3677 n->in(1)->bottom_type()->isa_vectmask() && 3678 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3679 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3680 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3681 match(Set dst (VectorReinterpret src)); 3682 effect(TEMP xtmp); 3683 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3684 ins_encode %{ 3685 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3686 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3687 assert(src_sz == dst_sz , "src and dst size mismatch"); 3688 int vlen_enc = vector_length_encoding(src_sz); 3689 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3690 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3696 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3697 n->bottom_type()->isa_vectmask() && 3698 n->in(1)->bottom_type()->isa_vectmask() && 3699 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3700 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3701 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3702 match(Set dst (VectorReinterpret src)); 3703 effect(TEMP xtmp); 3704 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3705 ins_encode %{ 3706 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3707 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3708 assert(src_sz == dst_sz , "src and dst size mismatch"); 3709 int vlen_enc = vector_length_encoding(src_sz); 3710 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3711 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct reinterpret(vec dst) %{ 3717 predicate(!n->bottom_type()->isa_vectmask() && 3718 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3719 match(Set dst (VectorReinterpret dst)); 3720 ins_cost(125); 3721 format %{ "vector_reinterpret $dst\t!" %} 3722 ins_encode %{ 3723 // empty 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct reinterpret_expand(vec dst, vec src) %{ 3729 predicate(UseAVX == 0 && 3730 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3731 match(Set dst (VectorReinterpret src)); 3732 ins_cost(125); 3733 effect(TEMP dst); 3734 format %{ "vector_reinterpret_expand $dst,$src" %} 3735 ins_encode %{ 3736 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3737 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3738 3739 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3740 if (src_vlen_in_bytes == 4) { 3741 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3742 } else { 3743 assert(src_vlen_in_bytes == 8, ""); 3744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3745 } 3746 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3747 %} 3748 ins_pipe( pipe_slow ); 3749 %} 3750 3751 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3752 predicate(UseAVX > 0 && 3753 !n->bottom_type()->isa_vectmask() && 3754 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3755 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3756 match(Set dst (VectorReinterpret src)); 3757 ins_cost(125); 3758 format %{ "vector_reinterpret_expand $dst,$src" %} 3759 ins_encode %{ 3760 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 3766 instruct vreinterpret_expand(legVec dst, vec src) %{ 3767 predicate(UseAVX > 0 && 3768 !n->bottom_type()->isa_vectmask() && 3769 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3770 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3771 match(Set dst (VectorReinterpret src)); 3772 ins_cost(125); 3773 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3774 ins_encode %{ 3775 switch (Matcher::vector_length_in_bytes(this, $src)) { 3776 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3777 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3778 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3779 default: ShouldNotReachHere(); 3780 } 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct reinterpret_shrink(vec dst, legVec src) %{ 3786 predicate(!n->bottom_type()->isa_vectmask() && 3787 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3788 match(Set dst (VectorReinterpret src)); 3789 ins_cost(125); 3790 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3791 ins_encode %{ 3792 switch (Matcher::vector_length_in_bytes(this)) { 3793 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3794 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3795 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3796 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3797 default: ShouldNotReachHere(); 3798 } 3799 %} 3800 ins_pipe( pipe_slow ); 3801 %} 3802 3803 // ---------------------------------------------------------------------------------------------------- 3804 3805 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3806 match(Set dst (RoundDoubleMode src rmode)); 3807 format %{ "roundsd $dst,$src" %} 3808 ins_cost(150); 3809 ins_encode %{ 3810 assert(UseSSE >= 4, "required"); 3811 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3812 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3813 } 3814 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3815 %} 3816 ins_pipe(pipe_slow); 3817 %} 3818 3819 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3820 match(Set dst (RoundDoubleMode con rmode)); 3821 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3822 ins_cost(150); 3823 ins_encode %{ 3824 assert(UseSSE >= 4, "required"); 3825 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3826 %} 3827 ins_pipe(pipe_slow); 3828 %} 3829 3830 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3831 predicate(Matcher::vector_length(n) < 8); 3832 match(Set dst (RoundDoubleModeV src rmode)); 3833 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3834 ins_encode %{ 3835 assert(UseAVX > 0, "required"); 3836 int vlen_enc = vector_length_encoding(this); 3837 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3838 %} 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3843 predicate(Matcher::vector_length(n) == 8); 3844 match(Set dst (RoundDoubleModeV src rmode)); 3845 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3846 ins_encode %{ 3847 assert(UseAVX > 2, "required"); 3848 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3849 %} 3850 ins_pipe( pipe_slow ); 3851 %} 3852 3853 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3854 predicate(Matcher::vector_length(n) < 8); 3855 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3856 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3857 ins_encode %{ 3858 assert(UseAVX > 0, "required"); 3859 int vlen_enc = vector_length_encoding(this); 3860 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3866 predicate(Matcher::vector_length(n) == 8); 3867 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3868 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3869 ins_encode %{ 3870 assert(UseAVX > 2, "required"); 3871 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct onspinwait() %{ 3877 match(OnSpinWait); 3878 ins_cost(200); 3879 3880 format %{ 3881 $$template 3882 $$emit$$"pause\t! membar_onspinwait" 3883 %} 3884 ins_encode %{ 3885 __ pause(); 3886 %} 3887 ins_pipe(pipe_slow); 3888 %} 3889 3890 // a * b + c 3891 instruct fmaD_reg(regD a, regD b, regD c) %{ 3892 match(Set c (FmaD c (Binary a b))); 3893 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3894 ins_cost(150); 3895 ins_encode %{ 3896 assert(UseFMA, "Needs FMA instructions support."); 3897 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 // a * b + c 3903 instruct fmaF_reg(regF a, regF b, regF c) %{ 3904 match(Set c (FmaF c (Binary a b))); 3905 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3906 ins_cost(150); 3907 ins_encode %{ 3908 assert(UseFMA, "Needs FMA instructions support."); 3909 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3910 %} 3911 ins_pipe( pipe_slow ); 3912 %} 3913 3914 // ====================VECTOR INSTRUCTIONS===================================== 3915 3916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3917 instruct MoveVec2Leg(legVec dst, vec src) %{ 3918 match(Set dst src); 3919 format %{ "" %} 3920 ins_encode %{ 3921 ShouldNotReachHere(); 3922 %} 3923 ins_pipe( fpu_reg_reg ); 3924 %} 3925 3926 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3927 match(Set dst src); 3928 format %{ "" %} 3929 ins_encode %{ 3930 ShouldNotReachHere(); 3931 %} 3932 ins_pipe( fpu_reg_reg ); 3933 %} 3934 3935 // ============================================================================ 3936 3937 // Load vectors generic operand pattern 3938 instruct loadV(vec dst, memory mem) %{ 3939 match(Set dst (LoadVector mem)); 3940 ins_cost(125); 3941 format %{ "load_vector $dst,$mem" %} 3942 ins_encode %{ 3943 BasicType bt = Matcher::vector_element_basic_type(this); 3944 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 // Store vectors generic operand pattern. 3950 instruct storeV(memory mem, vec src) %{ 3951 match(Set mem (StoreVector mem src)); 3952 ins_cost(145); 3953 format %{ "store_vector $mem,$src\n\t" %} 3954 ins_encode %{ 3955 switch (Matcher::vector_length_in_bytes(this, $src)) { 3956 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3957 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3958 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3959 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3960 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3961 default: ShouldNotReachHere(); 3962 } 3963 %} 3964 ins_pipe( pipe_slow ); 3965 %} 3966 3967 // ---------------------------------------- Gather ------------------------------------ 3968 3969 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 3970 3971 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3972 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 3973 Matcher::vector_length_in_bytes(n) <= 32); 3974 match(Set dst (LoadVectorGather mem idx)); 3975 effect(TEMP dst, TEMP tmp, TEMP mask); 3976 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3977 ins_encode %{ 3978 int vlen_enc = vector_length_encoding(this); 3979 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3980 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3981 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3982 __ lea($tmp$$Register, $mem$$Address); 3983 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3984 %} 3985 ins_pipe( pipe_slow ); 3986 %} 3987 3988 3989 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3990 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 3991 !is_subword_type(Matcher::vector_element_basic_type(n))); 3992 match(Set dst (LoadVectorGather mem idx)); 3993 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3994 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3995 ins_encode %{ 3996 int vlen_enc = vector_length_encoding(this); 3997 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3998 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 3999 __ lea($tmp$$Register, $mem$$Address); 4000 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4001 %} 4002 ins_pipe( pipe_slow ); 4003 %} 4004 4005 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4006 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4007 !is_subword_type(Matcher::vector_element_basic_type(n))); 4008 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4009 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4010 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4011 ins_encode %{ 4012 assert(UseAVX > 2, "sanity"); 4013 int vlen_enc = vector_length_encoding(this); 4014 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4015 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4016 // Note: Since gather instruction partially updates the opmask register used 4017 // for predication hense moving mask operand to a temporary. 4018 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4019 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4020 __ lea($tmp$$Register, $mem$$Address); 4021 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4027 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4028 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4029 effect(TEMP tmp, TEMP rtmp); 4030 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4031 ins_encode %{ 4032 int vlen_enc = vector_length_encoding(this); 4033 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4034 __ lea($tmp$$Register, $mem$$Address); 4035 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4041 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4042 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4043 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4044 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4045 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4046 ins_encode %{ 4047 int vlen_enc = vector_length_encoding(this); 4048 int vector_len = Matcher::vector_length(this); 4049 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4050 __ lea($tmp$$Register, $mem$$Address); 4051 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4052 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4053 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4054 %} 4055 ins_pipe( pipe_slow ); 4056 %} 4057 4058 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4059 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4060 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4061 effect(TEMP tmp, TEMP rtmp, KILL cr); 4062 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4063 ins_encode %{ 4064 int vlen_enc = vector_length_encoding(this); 4065 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4066 __ lea($tmp$$Register, $mem$$Address); 4067 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4068 %} 4069 ins_pipe( pipe_slow ); 4070 %} 4071 4072 4073 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4074 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4075 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4076 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4077 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4078 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4079 ins_encode %{ 4080 int vlen_enc = vector_length_encoding(this); 4081 int vector_len = Matcher::vector_length(this); 4082 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4083 __ lea($tmp$$Register, $mem$$Address); 4084 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4085 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4086 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 4092 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4093 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4094 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4095 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4096 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4097 ins_encode %{ 4098 int vlen_enc = vector_length_encoding(this); 4099 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4100 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4101 __ lea($tmp$$Register, $mem$$Address); 4102 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4103 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4104 %} 4105 ins_pipe( pipe_slow ); 4106 %} 4107 4108 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4109 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4110 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4111 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4112 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4113 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4114 ins_encode %{ 4115 int vlen_enc = vector_length_encoding(this); 4116 int vector_len = Matcher::vector_length(this); 4117 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4118 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4119 __ lea($tmp$$Register, $mem$$Address); 4120 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4121 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4122 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4123 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4129 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4130 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4131 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4132 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4133 ins_encode %{ 4134 int vlen_enc = vector_length_encoding(this); 4135 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4136 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4137 __ lea($tmp$$Register, $mem$$Address); 4138 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4139 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4140 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4141 %} 4142 ins_pipe( pipe_slow ); 4143 %} 4144 4145 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4146 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4147 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4148 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4149 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4150 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4151 ins_encode %{ 4152 int vlen_enc = vector_length_encoding(this); 4153 int vector_len = Matcher::vector_length(this); 4154 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4155 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4156 __ lea($tmp$$Register, $mem$$Address); 4157 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4158 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4159 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4160 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4161 %} 4162 ins_pipe( pipe_slow ); 4163 %} 4164 4165 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4166 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4167 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4168 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4169 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4170 ins_encode %{ 4171 int vlen_enc = vector_length_encoding(this); 4172 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4173 __ lea($tmp$$Register, $mem$$Address); 4174 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4175 if (elem_bt == T_SHORT) { 4176 __ movl($mask_idx$$Register, 0x55555555); 4177 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4178 } 4179 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4180 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4181 %} 4182 ins_pipe( pipe_slow ); 4183 %} 4184 4185 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4186 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4187 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4188 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4189 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4190 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4191 ins_encode %{ 4192 int vlen_enc = vector_length_encoding(this); 4193 int vector_len = Matcher::vector_length(this); 4194 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4195 __ lea($tmp$$Register, $mem$$Address); 4196 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4197 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4198 if (elem_bt == T_SHORT) { 4199 __ movl($mask_idx$$Register, 0x55555555); 4200 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4201 } 4202 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4203 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4204 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4210 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4211 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4212 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4213 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4214 ins_encode %{ 4215 int vlen_enc = vector_length_encoding(this); 4216 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4217 __ lea($tmp$$Register, $mem$$Address); 4218 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4219 if (elem_bt == T_SHORT) { 4220 __ movl($mask_idx$$Register, 0x55555555); 4221 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4222 } 4223 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4224 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4225 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4226 %} 4227 ins_pipe( pipe_slow ); 4228 %} 4229 4230 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4231 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4232 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4233 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4234 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4235 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4236 ins_encode %{ 4237 int vlen_enc = vector_length_encoding(this); 4238 int vector_len = Matcher::vector_length(this); 4239 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4240 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4241 __ lea($tmp$$Register, $mem$$Address); 4242 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4243 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4244 if (elem_bt == T_SHORT) { 4245 __ movl($mask_idx$$Register, 0x55555555); 4246 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4247 } 4248 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4249 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4250 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 // ====================Scatter======================================= 4256 4257 // Scatter INT, LONG, FLOAT, DOUBLE 4258 4259 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4260 predicate(UseAVX > 2); 4261 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4262 effect(TEMP tmp, TEMP ktmp); 4263 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4264 ins_encode %{ 4265 int vlen_enc = vector_length_encoding(this, $src); 4266 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4267 4268 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4269 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4270 4271 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4272 __ lea($tmp$$Register, $mem$$Address); 4273 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4279 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4280 effect(TEMP tmp, TEMP ktmp); 4281 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4282 ins_encode %{ 4283 int vlen_enc = vector_length_encoding(this, $src); 4284 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4285 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4286 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4287 // Note: Since scatter instruction partially updates the opmask register used 4288 // for predication hense moving mask operand to a temporary. 4289 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4290 __ lea($tmp$$Register, $mem$$Address); 4291 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 // ====================REPLICATE======================================= 4297 4298 // Replicate byte scalar to be vector 4299 instruct vReplB_reg(vec dst, rRegI src) %{ 4300 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4301 match(Set dst (Replicate src)); 4302 format %{ "replicateB $dst,$src" %} 4303 ins_encode %{ 4304 uint vlen = Matcher::vector_length(this); 4305 if (UseAVX >= 2) { 4306 int vlen_enc = vector_length_encoding(this); 4307 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4308 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4309 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4310 } else { 4311 __ movdl($dst$$XMMRegister, $src$$Register); 4312 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4313 } 4314 } else { 4315 assert(UseAVX < 2, ""); 4316 __ movdl($dst$$XMMRegister, $src$$Register); 4317 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4318 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4319 if (vlen >= 16) { 4320 assert(vlen == 16, ""); 4321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4322 } 4323 } 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 instruct ReplB_mem(vec dst, memory mem) %{ 4329 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4330 match(Set dst (Replicate (LoadB mem))); 4331 format %{ "replicateB $dst,$mem" %} 4332 ins_encode %{ 4333 int vlen_enc = vector_length_encoding(this); 4334 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 // ====================ReplicateS======================================= 4340 4341 instruct vReplS_reg(vec dst, rRegI src) %{ 4342 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4343 match(Set dst (Replicate src)); 4344 format %{ "replicateS $dst,$src" %} 4345 ins_encode %{ 4346 uint vlen = Matcher::vector_length(this); 4347 int vlen_enc = vector_length_encoding(this); 4348 if (UseAVX >= 2) { 4349 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4350 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4351 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4352 } else { 4353 __ movdl($dst$$XMMRegister, $src$$Register); 4354 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4355 } 4356 } else { 4357 assert(UseAVX < 2, ""); 4358 __ movdl($dst$$XMMRegister, $src$$Register); 4359 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4360 if (vlen >= 8) { 4361 assert(vlen == 8, ""); 4362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4363 } 4364 } 4365 %} 4366 ins_pipe( pipe_slow ); 4367 %} 4368 4369 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4370 match(Set dst (Replicate con)); 4371 effect(TEMP rtmp); 4372 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4373 ins_encode %{ 4374 int vlen_enc = vector_length_encoding(this); 4375 BasicType bt = Matcher::vector_element_basic_type(this); 4376 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4377 __ movl($rtmp$$Register, $con$$constant); 4378 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4384 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4385 match(Set dst (Replicate src)); 4386 effect(TEMP rtmp); 4387 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4388 ins_encode %{ 4389 int vlen_enc = vector_length_encoding(this); 4390 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4391 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct ReplS_mem(vec dst, memory mem) %{ 4397 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4398 match(Set dst (Replicate (LoadS mem))); 4399 format %{ "replicateS $dst,$mem" %} 4400 ins_encode %{ 4401 int vlen_enc = vector_length_encoding(this); 4402 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4403 %} 4404 ins_pipe( pipe_slow ); 4405 %} 4406 4407 // ====================ReplicateI======================================= 4408 4409 instruct ReplI_reg(vec dst, rRegI src) %{ 4410 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4411 match(Set dst (Replicate src)); 4412 format %{ "replicateI $dst,$src" %} 4413 ins_encode %{ 4414 uint vlen = Matcher::vector_length(this); 4415 int vlen_enc = vector_length_encoding(this); 4416 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4417 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4418 } else if (VM_Version::supports_avx2()) { 4419 __ movdl($dst$$XMMRegister, $src$$Register); 4420 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4421 } else { 4422 __ movdl($dst$$XMMRegister, $src$$Register); 4423 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4424 } 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct ReplI_mem(vec dst, memory mem) %{ 4430 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4431 match(Set dst (Replicate (LoadI mem))); 4432 format %{ "replicateI $dst,$mem" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this); 4435 if (VM_Version::supports_avx2()) { 4436 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4437 } else if (VM_Version::supports_avx()) { 4438 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4439 } else { 4440 __ movdl($dst$$XMMRegister, $mem$$Address); 4441 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4442 } 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct ReplI_imm(vec dst, immI con) %{ 4448 predicate(Matcher::is_non_long_integral_vector(n)); 4449 match(Set dst (Replicate con)); 4450 format %{ "replicateI $dst,$con" %} 4451 ins_encode %{ 4452 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4453 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4454 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4455 BasicType bt = Matcher::vector_element_basic_type(this); 4456 int vlen = Matcher::vector_length_in_bytes(this); 4457 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 // Replicate scalar zero to be vector 4463 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4464 predicate(Matcher::is_non_long_integral_vector(n)); 4465 match(Set dst (Replicate zero)); 4466 format %{ "replicateI $dst,$zero" %} 4467 ins_encode %{ 4468 int vlen_enc = vector_length_encoding(this); 4469 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4470 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4471 } else { 4472 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4473 } 4474 %} 4475 ins_pipe( fpu_reg_reg ); 4476 %} 4477 4478 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4479 predicate(Matcher::is_non_long_integral_vector(n)); 4480 match(Set dst (Replicate con)); 4481 format %{ "vallones $dst" %} 4482 ins_encode %{ 4483 int vector_len = vector_length_encoding(this); 4484 __ vallones($dst$$XMMRegister, vector_len); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 // ====================ReplicateL======================================= 4490 4491 // Replicate long (8 byte) scalar to be vector 4492 instruct ReplL_reg(vec dst, rRegL src) %{ 4493 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4494 match(Set dst (Replicate src)); 4495 format %{ "replicateL $dst,$src" %} 4496 ins_encode %{ 4497 int vlen = Matcher::vector_length(this); 4498 int vlen_enc = vector_length_encoding(this); 4499 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4500 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4501 } else if (VM_Version::supports_avx2()) { 4502 __ movdq($dst$$XMMRegister, $src$$Register); 4503 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4504 } else { 4505 __ movdq($dst$$XMMRegister, $src$$Register); 4506 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4507 } 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct ReplL_mem(vec dst, memory mem) %{ 4513 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4514 match(Set dst (Replicate (LoadL mem))); 4515 format %{ "replicateL $dst,$mem" %} 4516 ins_encode %{ 4517 int vlen_enc = vector_length_encoding(this); 4518 if (VM_Version::supports_avx2()) { 4519 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4520 } else if (VM_Version::supports_sse3()) { 4521 __ movddup($dst$$XMMRegister, $mem$$Address); 4522 } else { 4523 __ movq($dst$$XMMRegister, $mem$$Address); 4524 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4525 } 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4531 instruct ReplL_imm(vec dst, immL con) %{ 4532 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4533 match(Set dst (Replicate con)); 4534 format %{ "replicateL $dst,$con" %} 4535 ins_encode %{ 4536 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4537 int vlen = Matcher::vector_length_in_bytes(this); 4538 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4539 %} 4540 ins_pipe( pipe_slow ); 4541 %} 4542 4543 instruct ReplL_zero(vec dst, immL0 zero) %{ 4544 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4545 match(Set dst (Replicate zero)); 4546 format %{ "replicateL $dst,$zero" %} 4547 ins_encode %{ 4548 int vlen_enc = vector_length_encoding(this); 4549 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4550 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4551 } else { 4552 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4553 } 4554 %} 4555 ins_pipe( fpu_reg_reg ); 4556 %} 4557 4558 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4559 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4560 match(Set dst (Replicate con)); 4561 format %{ "vallones $dst" %} 4562 ins_encode %{ 4563 int vector_len = vector_length_encoding(this); 4564 __ vallones($dst$$XMMRegister, vector_len); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 // ====================ReplicateF======================================= 4570 4571 instruct vReplF_reg(vec dst, vlRegF src) %{ 4572 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4573 match(Set dst (Replicate src)); 4574 format %{ "replicateF $dst,$src" %} 4575 ins_encode %{ 4576 uint vlen = Matcher::vector_length(this); 4577 int vlen_enc = vector_length_encoding(this); 4578 if (vlen <= 4) { 4579 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4580 } else if (VM_Version::supports_avx2()) { 4581 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4582 } else { 4583 assert(vlen == 8, "sanity"); 4584 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4585 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4586 } 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct ReplF_reg(vec dst, vlRegF src) %{ 4592 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4593 match(Set dst (Replicate src)); 4594 format %{ "replicateF $dst,$src" %} 4595 ins_encode %{ 4596 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4597 %} 4598 ins_pipe( pipe_slow ); 4599 %} 4600 4601 instruct ReplF_mem(vec dst, memory mem) %{ 4602 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4603 match(Set dst (Replicate (LoadF mem))); 4604 format %{ "replicateF $dst,$mem" %} 4605 ins_encode %{ 4606 int vlen_enc = vector_length_encoding(this); 4607 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 // Replicate float scalar immediate to be vector by loading from const table. 4613 instruct ReplF_imm(vec dst, immF con) %{ 4614 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4615 match(Set dst (Replicate con)); 4616 format %{ "replicateF $dst,$con" %} 4617 ins_encode %{ 4618 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4619 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4620 int vlen = Matcher::vector_length_in_bytes(this); 4621 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct ReplF_zero(vec dst, immF0 zero) %{ 4627 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4628 match(Set dst (Replicate zero)); 4629 format %{ "replicateF $dst,$zero" %} 4630 ins_encode %{ 4631 int vlen_enc = vector_length_encoding(this); 4632 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4633 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4634 } else { 4635 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4636 } 4637 %} 4638 ins_pipe( fpu_reg_reg ); 4639 %} 4640 4641 // ====================ReplicateD======================================= 4642 4643 // Replicate double (8 bytes) scalar to be vector 4644 instruct vReplD_reg(vec dst, vlRegD src) %{ 4645 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4646 match(Set dst (Replicate src)); 4647 format %{ "replicateD $dst,$src" %} 4648 ins_encode %{ 4649 uint vlen = Matcher::vector_length(this); 4650 int vlen_enc = vector_length_encoding(this); 4651 if (vlen <= 2) { 4652 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4653 } else if (VM_Version::supports_avx2()) { 4654 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4655 } else { 4656 assert(vlen == 4, "sanity"); 4657 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4658 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4659 } 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 4664 instruct ReplD_reg(vec dst, vlRegD src) %{ 4665 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4666 match(Set dst (Replicate src)); 4667 format %{ "replicateD $dst,$src" %} 4668 ins_encode %{ 4669 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 4674 instruct ReplD_mem(vec dst, memory mem) %{ 4675 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4676 match(Set dst (Replicate (LoadD mem))); 4677 format %{ "replicateD $dst,$mem" %} 4678 ins_encode %{ 4679 if (Matcher::vector_length(this) >= 4) { 4680 int vlen_enc = vector_length_encoding(this); 4681 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4682 } else { 4683 __ movddup($dst$$XMMRegister, $mem$$Address); 4684 } 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4690 instruct ReplD_imm(vec dst, immD con) %{ 4691 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4692 match(Set dst (Replicate con)); 4693 format %{ "replicateD $dst,$con" %} 4694 ins_encode %{ 4695 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4696 int vlen = Matcher::vector_length_in_bytes(this); 4697 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct ReplD_zero(vec dst, immD0 zero) %{ 4703 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4704 match(Set dst (Replicate zero)); 4705 format %{ "replicateD $dst,$zero" %} 4706 ins_encode %{ 4707 int vlen_enc = vector_length_encoding(this); 4708 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4709 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4710 } else { 4711 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4712 } 4713 %} 4714 ins_pipe( fpu_reg_reg ); 4715 %} 4716 4717 // ====================VECTOR INSERT======================================= 4718 4719 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4720 predicate(Matcher::vector_length_in_bytes(n) < 32); 4721 match(Set dst (VectorInsert (Binary dst val) idx)); 4722 format %{ "vector_insert $dst,$val,$idx" %} 4723 ins_encode %{ 4724 assert(UseSSE >= 4, "required"); 4725 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4726 4727 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4728 4729 assert(is_integral_type(elem_bt), ""); 4730 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4731 4732 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4738 predicate(Matcher::vector_length_in_bytes(n) == 32); 4739 match(Set dst (VectorInsert (Binary src val) idx)); 4740 effect(TEMP vtmp); 4741 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4742 ins_encode %{ 4743 int vlen_enc = Assembler::AVX_256bit; 4744 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4745 int elem_per_lane = 16/type2aelembytes(elem_bt); 4746 int log2epr = log2(elem_per_lane); 4747 4748 assert(is_integral_type(elem_bt), "sanity"); 4749 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4750 4751 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4752 uint y_idx = ($idx$$constant >> log2epr) & 1; 4753 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4754 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4755 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4756 %} 4757 ins_pipe( pipe_slow ); 4758 %} 4759 4760 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4761 predicate(Matcher::vector_length_in_bytes(n) == 64); 4762 match(Set dst (VectorInsert (Binary src val) idx)); 4763 effect(TEMP vtmp); 4764 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4765 ins_encode %{ 4766 assert(UseAVX > 2, "sanity"); 4767 4768 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4769 int elem_per_lane = 16/type2aelembytes(elem_bt); 4770 int log2epr = log2(elem_per_lane); 4771 4772 assert(is_integral_type(elem_bt), ""); 4773 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4774 4775 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4776 uint y_idx = ($idx$$constant >> log2epr) & 3; 4777 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4778 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4779 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4785 predicate(Matcher::vector_length(n) == 2); 4786 match(Set dst (VectorInsert (Binary dst val) idx)); 4787 format %{ "vector_insert $dst,$val,$idx" %} 4788 ins_encode %{ 4789 assert(UseSSE >= 4, "required"); 4790 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4791 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4792 4793 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4799 predicate(Matcher::vector_length(n) == 4); 4800 match(Set dst (VectorInsert (Binary src val) idx)); 4801 effect(TEMP vtmp); 4802 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4803 ins_encode %{ 4804 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4805 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4806 4807 uint x_idx = $idx$$constant & right_n_bits(1); 4808 uint y_idx = ($idx$$constant >> 1) & 1; 4809 int vlen_enc = Assembler::AVX_256bit; 4810 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4811 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4812 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4818 predicate(Matcher::vector_length(n) == 8); 4819 match(Set dst (VectorInsert (Binary src val) idx)); 4820 effect(TEMP vtmp); 4821 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4822 ins_encode %{ 4823 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4824 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4825 4826 uint x_idx = $idx$$constant & right_n_bits(1); 4827 uint y_idx = ($idx$$constant >> 1) & 3; 4828 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4829 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4830 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4831 %} 4832 ins_pipe( pipe_slow ); 4833 %} 4834 4835 instruct insertF(vec dst, regF val, immU8 idx) %{ 4836 predicate(Matcher::vector_length(n) < 8); 4837 match(Set dst (VectorInsert (Binary dst val) idx)); 4838 format %{ "vector_insert $dst,$val,$idx" %} 4839 ins_encode %{ 4840 assert(UseSSE >= 4, "sanity"); 4841 4842 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4843 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4844 4845 uint x_idx = $idx$$constant & right_n_bits(2); 4846 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4852 predicate(Matcher::vector_length(n) >= 8); 4853 match(Set dst (VectorInsert (Binary src val) idx)); 4854 effect(TEMP vtmp); 4855 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4856 ins_encode %{ 4857 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4858 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4859 4860 int vlen = Matcher::vector_length(this); 4861 uint x_idx = $idx$$constant & right_n_bits(2); 4862 if (vlen == 8) { 4863 uint y_idx = ($idx$$constant >> 2) & 1; 4864 int vlen_enc = Assembler::AVX_256bit; 4865 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4866 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4867 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4868 } else { 4869 assert(vlen == 16, "sanity"); 4870 uint y_idx = ($idx$$constant >> 2) & 3; 4871 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4872 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4873 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4874 } 4875 %} 4876 ins_pipe( pipe_slow ); 4877 %} 4878 4879 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4880 predicate(Matcher::vector_length(n) == 2); 4881 match(Set dst (VectorInsert (Binary dst val) idx)); 4882 effect(TEMP tmp); 4883 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4884 ins_encode %{ 4885 assert(UseSSE >= 4, "sanity"); 4886 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4887 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4888 4889 __ movq($tmp$$Register, $val$$XMMRegister); 4890 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4891 %} 4892 ins_pipe( pipe_slow ); 4893 %} 4894 4895 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4896 predicate(Matcher::vector_length(n) == 4); 4897 match(Set dst (VectorInsert (Binary src val) idx)); 4898 effect(TEMP vtmp, TEMP tmp); 4899 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4900 ins_encode %{ 4901 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4902 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4903 4904 uint x_idx = $idx$$constant & right_n_bits(1); 4905 uint y_idx = ($idx$$constant >> 1) & 1; 4906 int vlen_enc = Assembler::AVX_256bit; 4907 __ movq($tmp$$Register, $val$$XMMRegister); 4908 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4909 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4910 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4911 %} 4912 ins_pipe( pipe_slow ); 4913 %} 4914 4915 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4916 predicate(Matcher::vector_length(n) == 8); 4917 match(Set dst (VectorInsert (Binary src val) idx)); 4918 effect(TEMP tmp, TEMP vtmp); 4919 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4920 ins_encode %{ 4921 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4922 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4923 4924 uint x_idx = $idx$$constant & right_n_bits(1); 4925 uint y_idx = ($idx$$constant >> 1) & 3; 4926 __ movq($tmp$$Register, $val$$XMMRegister); 4927 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4928 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4929 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4930 %} 4931 ins_pipe( pipe_slow ); 4932 %} 4933 4934 // ====================REDUCTION ARITHMETIC======================================= 4935 4936 // =======================Int Reduction========================================== 4937 4938 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4939 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4940 match(Set dst (AddReductionVI src1 src2)); 4941 match(Set dst (MulReductionVI src1 src2)); 4942 match(Set dst (AndReductionV src1 src2)); 4943 match(Set dst ( OrReductionV src1 src2)); 4944 match(Set dst (XorReductionV src1 src2)); 4945 match(Set dst (MinReductionV src1 src2)); 4946 match(Set dst (MaxReductionV src1 src2)); 4947 effect(TEMP vtmp1, TEMP vtmp2); 4948 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4949 ins_encode %{ 4950 int opcode = this->ideal_Opcode(); 4951 int vlen = Matcher::vector_length(this, $src2); 4952 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 // =======================Long Reduction========================================== 4958 4959 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4960 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4961 match(Set dst (AddReductionVL src1 src2)); 4962 match(Set dst (MulReductionVL src1 src2)); 4963 match(Set dst (AndReductionV src1 src2)); 4964 match(Set dst ( OrReductionV src1 src2)); 4965 match(Set dst (XorReductionV src1 src2)); 4966 match(Set dst (MinReductionV src1 src2)); 4967 match(Set dst (MaxReductionV src1 src2)); 4968 effect(TEMP vtmp1, TEMP vtmp2); 4969 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4970 ins_encode %{ 4971 int opcode = this->ideal_Opcode(); 4972 int vlen = Matcher::vector_length(this, $src2); 4973 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4974 %} 4975 ins_pipe( pipe_slow ); 4976 %} 4977 4978 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4979 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4980 match(Set dst (AddReductionVL src1 src2)); 4981 match(Set dst (MulReductionVL src1 src2)); 4982 match(Set dst (AndReductionV src1 src2)); 4983 match(Set dst ( OrReductionV src1 src2)); 4984 match(Set dst (XorReductionV src1 src2)); 4985 match(Set dst (MinReductionV src1 src2)); 4986 match(Set dst (MaxReductionV src1 src2)); 4987 effect(TEMP vtmp1, TEMP vtmp2); 4988 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4989 ins_encode %{ 4990 int opcode = this->ideal_Opcode(); 4991 int vlen = Matcher::vector_length(this, $src2); 4992 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4993 %} 4994 ins_pipe( pipe_slow ); 4995 %} 4996 4997 // =======================Float Reduction========================================== 4998 4999 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5000 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5001 match(Set dst (AddReductionVF dst src)); 5002 match(Set dst (MulReductionVF dst src)); 5003 effect(TEMP dst, TEMP vtmp); 5004 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5005 ins_encode %{ 5006 int opcode = this->ideal_Opcode(); 5007 int vlen = Matcher::vector_length(this, $src); 5008 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5014 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5015 match(Set dst (AddReductionVF dst src)); 5016 match(Set dst (MulReductionVF dst src)); 5017 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5018 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5019 ins_encode %{ 5020 int opcode = this->ideal_Opcode(); 5021 int vlen = Matcher::vector_length(this, $src); 5022 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5023 %} 5024 ins_pipe( pipe_slow ); 5025 %} 5026 5027 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5028 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5029 match(Set dst (AddReductionVF dst src)); 5030 match(Set dst (MulReductionVF dst src)); 5031 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5032 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5033 ins_encode %{ 5034 int opcode = this->ideal_Opcode(); 5035 int vlen = Matcher::vector_length(this, $src); 5036 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5037 %} 5038 ins_pipe( pipe_slow ); 5039 %} 5040 5041 5042 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5043 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5044 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5045 // src1 contains reduction identity 5046 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5047 match(Set dst (AddReductionVF src1 src2)); 5048 match(Set dst (MulReductionVF src1 src2)); 5049 effect(TEMP dst); 5050 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5051 ins_encode %{ 5052 int opcode = this->ideal_Opcode(); 5053 int vlen = Matcher::vector_length(this, $src2); 5054 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5055 %} 5056 ins_pipe( pipe_slow ); 5057 %} 5058 5059 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5060 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5061 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5062 // src1 contains reduction identity 5063 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5064 match(Set dst (AddReductionVF src1 src2)); 5065 match(Set dst (MulReductionVF src1 src2)); 5066 effect(TEMP dst, TEMP vtmp); 5067 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5068 ins_encode %{ 5069 int opcode = this->ideal_Opcode(); 5070 int vlen = Matcher::vector_length(this, $src2); 5071 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 5076 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5077 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5078 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5079 // src1 contains reduction identity 5080 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5081 match(Set dst (AddReductionVF src1 src2)); 5082 match(Set dst (MulReductionVF src1 src2)); 5083 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5084 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5085 ins_encode %{ 5086 int opcode = this->ideal_Opcode(); 5087 int vlen = Matcher::vector_length(this, $src2); 5088 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5094 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5095 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5096 // src1 contains reduction identity 5097 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5098 match(Set dst (AddReductionVF src1 src2)); 5099 match(Set dst (MulReductionVF src1 src2)); 5100 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5101 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5102 ins_encode %{ 5103 int opcode = this->ideal_Opcode(); 5104 int vlen = Matcher::vector_length(this, $src2); 5105 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5106 %} 5107 ins_pipe( pipe_slow ); 5108 %} 5109 5110 // =======================Double Reduction========================================== 5111 5112 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5113 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5114 match(Set dst (AddReductionVD dst src)); 5115 match(Set dst (MulReductionVD dst src)); 5116 effect(TEMP dst, TEMP vtmp); 5117 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5118 ins_encode %{ 5119 int opcode = this->ideal_Opcode(); 5120 int vlen = Matcher::vector_length(this, $src); 5121 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5127 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5128 match(Set dst (AddReductionVD dst src)); 5129 match(Set dst (MulReductionVD dst src)); 5130 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5131 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5132 ins_encode %{ 5133 int opcode = this->ideal_Opcode(); 5134 int vlen = Matcher::vector_length(this, $src); 5135 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5141 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5142 match(Set dst (AddReductionVD dst src)); 5143 match(Set dst (MulReductionVD dst src)); 5144 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5145 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5146 ins_encode %{ 5147 int opcode = this->ideal_Opcode(); 5148 int vlen = Matcher::vector_length(this, $src); 5149 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5150 %} 5151 ins_pipe( pipe_slow ); 5152 %} 5153 5154 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5155 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5156 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5157 // src1 contains reduction identity 5158 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5159 match(Set dst (AddReductionVD src1 src2)); 5160 match(Set dst (MulReductionVD src1 src2)); 5161 effect(TEMP dst); 5162 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5163 ins_encode %{ 5164 int opcode = this->ideal_Opcode(); 5165 int vlen = Matcher::vector_length(this, $src2); 5166 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5167 %} 5168 ins_pipe( pipe_slow ); 5169 %} 5170 5171 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5172 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5173 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5174 // src1 contains reduction identity 5175 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5176 match(Set dst (AddReductionVD src1 src2)); 5177 match(Set dst (MulReductionVD src1 src2)); 5178 effect(TEMP dst, TEMP vtmp); 5179 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5180 ins_encode %{ 5181 int opcode = this->ideal_Opcode(); 5182 int vlen = Matcher::vector_length(this, $src2); 5183 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5184 %} 5185 ins_pipe( pipe_slow ); 5186 %} 5187 5188 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5189 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5190 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5191 // src1 contains reduction identity 5192 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5193 match(Set dst (AddReductionVD src1 src2)); 5194 match(Set dst (MulReductionVD src1 src2)); 5195 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5196 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5197 ins_encode %{ 5198 int opcode = this->ideal_Opcode(); 5199 int vlen = Matcher::vector_length(this, $src2); 5200 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5201 %} 5202 ins_pipe( pipe_slow ); 5203 %} 5204 5205 // =======================Byte Reduction========================================== 5206 5207 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5208 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5209 match(Set dst (AddReductionVI src1 src2)); 5210 match(Set dst (AndReductionV src1 src2)); 5211 match(Set dst ( OrReductionV src1 src2)); 5212 match(Set dst (XorReductionV src1 src2)); 5213 match(Set dst (MinReductionV src1 src2)); 5214 match(Set dst (MaxReductionV src1 src2)); 5215 effect(TEMP vtmp1, TEMP vtmp2); 5216 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5217 ins_encode %{ 5218 int opcode = this->ideal_Opcode(); 5219 int vlen = Matcher::vector_length(this, $src2); 5220 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5226 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5227 match(Set dst (AddReductionVI src1 src2)); 5228 match(Set dst (AndReductionV src1 src2)); 5229 match(Set dst ( OrReductionV src1 src2)); 5230 match(Set dst (XorReductionV src1 src2)); 5231 match(Set dst (MinReductionV src1 src2)); 5232 match(Set dst (MaxReductionV src1 src2)); 5233 effect(TEMP vtmp1, TEMP vtmp2); 5234 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5235 ins_encode %{ 5236 int opcode = this->ideal_Opcode(); 5237 int vlen = Matcher::vector_length(this, $src2); 5238 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 // =======================Short Reduction========================================== 5244 5245 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5246 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5247 match(Set dst (AddReductionVI src1 src2)); 5248 match(Set dst (MulReductionVI src1 src2)); 5249 match(Set dst (AndReductionV src1 src2)); 5250 match(Set dst ( OrReductionV src1 src2)); 5251 match(Set dst (XorReductionV src1 src2)); 5252 match(Set dst (MinReductionV src1 src2)); 5253 match(Set dst (MaxReductionV src1 src2)); 5254 effect(TEMP vtmp1, TEMP vtmp2); 5255 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5256 ins_encode %{ 5257 int opcode = this->ideal_Opcode(); 5258 int vlen = Matcher::vector_length(this, $src2); 5259 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5260 %} 5261 ins_pipe( pipe_slow ); 5262 %} 5263 5264 // =======================Mul Reduction========================================== 5265 5266 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5267 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5268 Matcher::vector_length(n->in(2)) <= 32); // src2 5269 match(Set dst (MulReductionVI src1 src2)); 5270 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5271 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5272 ins_encode %{ 5273 int opcode = this->ideal_Opcode(); 5274 int vlen = Matcher::vector_length(this, $src2); 5275 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5276 %} 5277 ins_pipe( pipe_slow ); 5278 %} 5279 5280 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5281 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5282 Matcher::vector_length(n->in(2)) == 64); // src2 5283 match(Set dst (MulReductionVI src1 src2)); 5284 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5285 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5286 ins_encode %{ 5287 int opcode = this->ideal_Opcode(); 5288 int vlen = Matcher::vector_length(this, $src2); 5289 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5290 %} 5291 ins_pipe( pipe_slow ); 5292 %} 5293 5294 //--------------------Min/Max Float Reduction -------------------- 5295 // Float Min Reduction 5296 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5297 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5298 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5299 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5300 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5301 Matcher::vector_length(n->in(2)) == 2); 5302 match(Set dst (MinReductionV src1 src2)); 5303 match(Set dst (MaxReductionV src1 src2)); 5304 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5305 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5306 ins_encode %{ 5307 assert(UseAVX > 0, "sanity"); 5308 5309 int opcode = this->ideal_Opcode(); 5310 int vlen = Matcher::vector_length(this, $src2); 5311 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5312 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5318 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5319 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5320 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5321 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5322 Matcher::vector_length(n->in(2)) >= 4); 5323 match(Set dst (MinReductionV src1 src2)); 5324 match(Set dst (MaxReductionV src1 src2)); 5325 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5326 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5327 ins_encode %{ 5328 assert(UseAVX > 0, "sanity"); 5329 5330 int opcode = this->ideal_Opcode(); 5331 int vlen = Matcher::vector_length(this, $src2); 5332 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5333 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5334 %} 5335 ins_pipe( pipe_slow ); 5336 %} 5337 5338 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5339 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5340 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5341 Matcher::vector_length(n->in(2)) == 2); 5342 match(Set dst (MinReductionV dst src)); 5343 match(Set dst (MaxReductionV dst src)); 5344 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5345 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5346 ins_encode %{ 5347 assert(UseAVX > 0, "sanity"); 5348 5349 int opcode = this->ideal_Opcode(); 5350 int vlen = Matcher::vector_length(this, $src); 5351 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5352 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 5358 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5359 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5360 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5361 Matcher::vector_length(n->in(2)) >= 4); 5362 match(Set dst (MinReductionV dst src)); 5363 match(Set dst (MaxReductionV dst src)); 5364 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5365 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5366 ins_encode %{ 5367 assert(UseAVX > 0, "sanity"); 5368 5369 int opcode = this->ideal_Opcode(); 5370 int vlen = Matcher::vector_length(this, $src); 5371 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5372 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 5378 //--------------------Min Double Reduction -------------------- 5379 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5380 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5381 rFlagsReg cr) %{ 5382 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5383 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5384 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5385 Matcher::vector_length(n->in(2)) == 2); 5386 match(Set dst (MinReductionV src1 src2)); 5387 match(Set dst (MaxReductionV src1 src2)); 5388 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5389 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5390 ins_encode %{ 5391 assert(UseAVX > 0, "sanity"); 5392 5393 int opcode = this->ideal_Opcode(); 5394 int vlen = Matcher::vector_length(this, $src2); 5395 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5396 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5402 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5403 rFlagsReg cr) %{ 5404 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5405 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5406 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5407 Matcher::vector_length(n->in(2)) >= 4); 5408 match(Set dst (MinReductionV src1 src2)); 5409 match(Set dst (MaxReductionV src1 src2)); 5410 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5411 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5412 ins_encode %{ 5413 assert(UseAVX > 0, "sanity"); 5414 5415 int opcode = this->ideal_Opcode(); 5416 int vlen = Matcher::vector_length(this, $src2); 5417 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5418 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5419 %} 5420 ins_pipe( pipe_slow ); 5421 %} 5422 5423 5424 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5425 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5426 rFlagsReg cr) %{ 5427 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5428 Matcher::vector_length(n->in(2)) == 2); 5429 match(Set dst (MinReductionV dst src)); 5430 match(Set dst (MaxReductionV dst src)); 5431 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5432 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5433 ins_encode %{ 5434 assert(UseAVX > 0, "sanity"); 5435 5436 int opcode = this->ideal_Opcode(); 5437 int vlen = Matcher::vector_length(this, $src); 5438 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5439 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5440 %} 5441 ins_pipe( pipe_slow ); 5442 %} 5443 5444 instruct minmax_reductionD_av(legRegD dst, legVec src, 5445 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5446 rFlagsReg cr) %{ 5447 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5448 Matcher::vector_length(n->in(2)) >= 4); 5449 match(Set dst (MinReductionV dst src)); 5450 match(Set dst (MaxReductionV dst src)); 5451 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5452 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5453 ins_encode %{ 5454 assert(UseAVX > 0, "sanity"); 5455 5456 int opcode = this->ideal_Opcode(); 5457 int vlen = Matcher::vector_length(this, $src); 5458 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5459 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5460 %} 5461 ins_pipe( pipe_slow ); 5462 %} 5463 5464 // ====================VECTOR ARITHMETIC======================================= 5465 5466 // --------------------------------- ADD -------------------------------------- 5467 5468 // Bytes vector add 5469 instruct vaddB(vec dst, vec src) %{ 5470 predicate(UseAVX == 0); 5471 match(Set dst (AddVB dst src)); 5472 format %{ "paddb $dst,$src\t! add packedB" %} 5473 ins_encode %{ 5474 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5475 %} 5476 ins_pipe( pipe_slow ); 5477 %} 5478 5479 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5480 predicate(UseAVX > 0); 5481 match(Set dst (AddVB src1 src2)); 5482 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5483 ins_encode %{ 5484 int vlen_enc = vector_length_encoding(this); 5485 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5486 %} 5487 ins_pipe( pipe_slow ); 5488 %} 5489 5490 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5491 predicate((UseAVX > 0) && 5492 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5493 match(Set dst (AddVB src (LoadVector mem))); 5494 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5495 ins_encode %{ 5496 int vlen_enc = vector_length_encoding(this); 5497 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5498 %} 5499 ins_pipe( pipe_slow ); 5500 %} 5501 5502 // Shorts/Chars vector add 5503 instruct vaddS(vec dst, vec src) %{ 5504 predicate(UseAVX == 0); 5505 match(Set dst (AddVS dst src)); 5506 format %{ "paddw $dst,$src\t! add packedS" %} 5507 ins_encode %{ 5508 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5514 predicate(UseAVX > 0); 5515 match(Set dst (AddVS src1 src2)); 5516 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5517 ins_encode %{ 5518 int vlen_enc = vector_length_encoding(this); 5519 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5520 %} 5521 ins_pipe( pipe_slow ); 5522 %} 5523 5524 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5525 predicate((UseAVX > 0) && 5526 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5527 match(Set dst (AddVS src (LoadVector mem))); 5528 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5529 ins_encode %{ 5530 int vlen_enc = vector_length_encoding(this); 5531 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 // Integers vector add 5537 instruct vaddI(vec dst, vec src) %{ 5538 predicate(UseAVX == 0); 5539 match(Set dst (AddVI dst src)); 5540 format %{ "paddd $dst,$src\t! add packedI" %} 5541 ins_encode %{ 5542 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5548 predicate(UseAVX > 0); 5549 match(Set dst (AddVI src1 src2)); 5550 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5551 ins_encode %{ 5552 int vlen_enc = vector_length_encoding(this); 5553 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5554 %} 5555 ins_pipe( pipe_slow ); 5556 %} 5557 5558 5559 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5560 predicate((UseAVX > 0) && 5561 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5562 match(Set dst (AddVI src (LoadVector mem))); 5563 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5564 ins_encode %{ 5565 int vlen_enc = vector_length_encoding(this); 5566 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5567 %} 5568 ins_pipe( pipe_slow ); 5569 %} 5570 5571 // Longs vector add 5572 instruct vaddL(vec dst, vec src) %{ 5573 predicate(UseAVX == 0); 5574 match(Set dst (AddVL dst src)); 5575 format %{ "paddq $dst,$src\t! add packedL" %} 5576 ins_encode %{ 5577 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5583 predicate(UseAVX > 0); 5584 match(Set dst (AddVL src1 src2)); 5585 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5586 ins_encode %{ 5587 int vlen_enc = vector_length_encoding(this); 5588 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5594 predicate((UseAVX > 0) && 5595 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5596 match(Set dst (AddVL src (LoadVector mem))); 5597 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5598 ins_encode %{ 5599 int vlen_enc = vector_length_encoding(this); 5600 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5601 %} 5602 ins_pipe( pipe_slow ); 5603 %} 5604 5605 // Floats vector add 5606 instruct vaddF(vec dst, vec src) %{ 5607 predicate(UseAVX == 0); 5608 match(Set dst (AddVF dst src)); 5609 format %{ "addps $dst,$src\t! add packedF" %} 5610 ins_encode %{ 5611 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5612 %} 5613 ins_pipe( pipe_slow ); 5614 %} 5615 5616 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5617 predicate(UseAVX > 0); 5618 match(Set dst (AddVF src1 src2)); 5619 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5620 ins_encode %{ 5621 int vlen_enc = vector_length_encoding(this); 5622 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5623 %} 5624 ins_pipe( pipe_slow ); 5625 %} 5626 5627 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5628 predicate((UseAVX > 0) && 5629 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5630 match(Set dst (AddVF src (LoadVector mem))); 5631 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5632 ins_encode %{ 5633 int vlen_enc = vector_length_encoding(this); 5634 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5635 %} 5636 ins_pipe( pipe_slow ); 5637 %} 5638 5639 // Doubles vector add 5640 instruct vaddD(vec dst, vec src) %{ 5641 predicate(UseAVX == 0); 5642 match(Set dst (AddVD dst src)); 5643 format %{ "addpd $dst,$src\t! add packedD" %} 5644 ins_encode %{ 5645 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5646 %} 5647 ins_pipe( pipe_slow ); 5648 %} 5649 5650 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5651 predicate(UseAVX > 0); 5652 match(Set dst (AddVD src1 src2)); 5653 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5654 ins_encode %{ 5655 int vlen_enc = vector_length_encoding(this); 5656 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5657 %} 5658 ins_pipe( pipe_slow ); 5659 %} 5660 5661 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5662 predicate((UseAVX > 0) && 5663 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5664 match(Set dst (AddVD src (LoadVector mem))); 5665 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5666 ins_encode %{ 5667 int vlen_enc = vector_length_encoding(this); 5668 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 // --------------------------------- SUB -------------------------------------- 5674 5675 // Bytes vector sub 5676 instruct vsubB(vec dst, vec src) %{ 5677 predicate(UseAVX == 0); 5678 match(Set dst (SubVB dst src)); 5679 format %{ "psubb $dst,$src\t! sub packedB" %} 5680 ins_encode %{ 5681 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5682 %} 5683 ins_pipe( pipe_slow ); 5684 %} 5685 5686 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5687 predicate(UseAVX > 0); 5688 match(Set dst (SubVB src1 src2)); 5689 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5690 ins_encode %{ 5691 int vlen_enc = vector_length_encoding(this); 5692 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5693 %} 5694 ins_pipe( pipe_slow ); 5695 %} 5696 5697 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5698 predicate((UseAVX > 0) && 5699 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5700 match(Set dst (SubVB src (LoadVector mem))); 5701 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5702 ins_encode %{ 5703 int vlen_enc = vector_length_encoding(this); 5704 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5705 %} 5706 ins_pipe( pipe_slow ); 5707 %} 5708 5709 // Shorts/Chars vector sub 5710 instruct vsubS(vec dst, vec src) %{ 5711 predicate(UseAVX == 0); 5712 match(Set dst (SubVS dst src)); 5713 format %{ "psubw $dst,$src\t! sub packedS" %} 5714 ins_encode %{ 5715 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5716 %} 5717 ins_pipe( pipe_slow ); 5718 %} 5719 5720 5721 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5722 predicate(UseAVX > 0); 5723 match(Set dst (SubVS src1 src2)); 5724 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5725 ins_encode %{ 5726 int vlen_enc = vector_length_encoding(this); 5727 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5733 predicate((UseAVX > 0) && 5734 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5735 match(Set dst (SubVS src (LoadVector mem))); 5736 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5737 ins_encode %{ 5738 int vlen_enc = vector_length_encoding(this); 5739 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5740 %} 5741 ins_pipe( pipe_slow ); 5742 %} 5743 5744 // Integers vector sub 5745 instruct vsubI(vec dst, vec src) %{ 5746 predicate(UseAVX == 0); 5747 match(Set dst (SubVI dst src)); 5748 format %{ "psubd $dst,$src\t! sub packedI" %} 5749 ins_encode %{ 5750 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5756 predicate(UseAVX > 0); 5757 match(Set dst (SubVI src1 src2)); 5758 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5759 ins_encode %{ 5760 int vlen_enc = vector_length_encoding(this); 5761 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5762 %} 5763 ins_pipe( pipe_slow ); 5764 %} 5765 5766 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5767 predicate((UseAVX > 0) && 5768 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5769 match(Set dst (SubVI src (LoadVector mem))); 5770 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5771 ins_encode %{ 5772 int vlen_enc = vector_length_encoding(this); 5773 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 // Longs vector sub 5779 instruct vsubL(vec dst, vec src) %{ 5780 predicate(UseAVX == 0); 5781 match(Set dst (SubVL dst src)); 5782 format %{ "psubq $dst,$src\t! sub packedL" %} 5783 ins_encode %{ 5784 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5790 predicate(UseAVX > 0); 5791 match(Set dst (SubVL src1 src2)); 5792 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5793 ins_encode %{ 5794 int vlen_enc = vector_length_encoding(this); 5795 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5796 %} 5797 ins_pipe( pipe_slow ); 5798 %} 5799 5800 5801 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5802 predicate((UseAVX > 0) && 5803 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5804 match(Set dst (SubVL src (LoadVector mem))); 5805 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5806 ins_encode %{ 5807 int vlen_enc = vector_length_encoding(this); 5808 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 // Floats vector sub 5814 instruct vsubF(vec dst, vec src) %{ 5815 predicate(UseAVX == 0); 5816 match(Set dst (SubVF dst src)); 5817 format %{ "subps $dst,$src\t! sub packedF" %} 5818 ins_encode %{ 5819 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5825 predicate(UseAVX > 0); 5826 match(Set dst (SubVF src1 src2)); 5827 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5828 ins_encode %{ 5829 int vlen_enc = vector_length_encoding(this); 5830 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5831 %} 5832 ins_pipe( pipe_slow ); 5833 %} 5834 5835 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5836 predicate((UseAVX > 0) && 5837 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5838 match(Set dst (SubVF src (LoadVector mem))); 5839 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5840 ins_encode %{ 5841 int vlen_enc = vector_length_encoding(this); 5842 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 // Doubles vector sub 5848 instruct vsubD(vec dst, vec src) %{ 5849 predicate(UseAVX == 0); 5850 match(Set dst (SubVD dst src)); 5851 format %{ "subpd $dst,$src\t! sub packedD" %} 5852 ins_encode %{ 5853 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5859 predicate(UseAVX > 0); 5860 match(Set dst (SubVD src1 src2)); 5861 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5862 ins_encode %{ 5863 int vlen_enc = vector_length_encoding(this); 5864 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5870 predicate((UseAVX > 0) && 5871 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5872 match(Set dst (SubVD src (LoadVector mem))); 5873 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5874 ins_encode %{ 5875 int vlen_enc = vector_length_encoding(this); 5876 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 // --------------------------------- MUL -------------------------------------- 5882 5883 // Byte vector mul 5884 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5885 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5886 match(Set dst (MulVB src1 src2)); 5887 effect(TEMP dst, TEMP xtmp); 5888 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5889 ins_encode %{ 5890 assert(UseSSE > 3, "required"); 5891 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5892 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5893 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5894 __ psllw($dst$$XMMRegister, 8); 5895 __ psrlw($dst$$XMMRegister, 8); 5896 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5902 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5903 match(Set dst (MulVB src1 src2)); 5904 effect(TEMP dst, TEMP xtmp); 5905 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5906 ins_encode %{ 5907 assert(UseSSE > 3, "required"); 5908 // Odd-index elements 5909 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5910 __ psrlw($dst$$XMMRegister, 8); 5911 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5912 __ psrlw($xtmp$$XMMRegister, 8); 5913 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5914 __ psllw($dst$$XMMRegister, 8); 5915 // Even-index elements 5916 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5917 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5918 __ psllw($xtmp$$XMMRegister, 8); 5919 __ psrlw($xtmp$$XMMRegister, 8); 5920 // Combine 5921 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5927 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5928 match(Set dst (MulVB src1 src2)); 5929 effect(TEMP xtmp1, TEMP xtmp2); 5930 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5931 ins_encode %{ 5932 int vlen_enc = vector_length_encoding(this); 5933 // Odd-index elements 5934 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5935 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5936 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5937 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5938 // Even-index elements 5939 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5940 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5941 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5942 // Combine 5943 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 // Shorts/Chars vector mul 5949 instruct vmulS(vec dst, vec src) %{ 5950 predicate(UseAVX == 0); 5951 match(Set dst (MulVS dst src)); 5952 format %{ "pmullw $dst,$src\t! mul packedS" %} 5953 ins_encode %{ 5954 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5960 predicate(UseAVX > 0); 5961 match(Set dst (MulVS src1 src2)); 5962 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5963 ins_encode %{ 5964 int vlen_enc = vector_length_encoding(this); 5965 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5971 predicate((UseAVX > 0) && 5972 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5973 match(Set dst (MulVS src (LoadVector mem))); 5974 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5975 ins_encode %{ 5976 int vlen_enc = vector_length_encoding(this); 5977 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 // Integers vector mul 5983 instruct vmulI(vec dst, vec src) %{ 5984 predicate(UseAVX == 0); 5985 match(Set dst (MulVI dst src)); 5986 format %{ "pmulld $dst,$src\t! mul packedI" %} 5987 ins_encode %{ 5988 assert(UseSSE > 3, "required"); 5989 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5995 predicate(UseAVX > 0); 5996 match(Set dst (MulVI src1 src2)); 5997 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5998 ins_encode %{ 5999 int vlen_enc = vector_length_encoding(this); 6000 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6006 predicate((UseAVX > 0) && 6007 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6008 match(Set dst (MulVI src (LoadVector mem))); 6009 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6010 ins_encode %{ 6011 int vlen_enc = vector_length_encoding(this); 6012 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6013 %} 6014 ins_pipe( pipe_slow ); 6015 %} 6016 6017 // Longs vector mul 6018 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6019 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6020 VM_Version::supports_avx512dq()) || 6021 VM_Version::supports_avx512vldq()); 6022 match(Set dst (MulVL src1 src2)); 6023 ins_cost(500); 6024 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6025 ins_encode %{ 6026 assert(UseAVX > 2, "required"); 6027 int vlen_enc = vector_length_encoding(this); 6028 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6034 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6035 VM_Version::supports_avx512dq()) || 6036 (Matcher::vector_length_in_bytes(n) > 8 && 6037 VM_Version::supports_avx512vldq())); 6038 match(Set dst (MulVL src (LoadVector mem))); 6039 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6040 ins_cost(500); 6041 ins_encode %{ 6042 assert(UseAVX > 2, "required"); 6043 int vlen_enc = vector_length_encoding(this); 6044 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6050 predicate(UseAVX == 0); 6051 match(Set dst (MulVL src1 src2)); 6052 ins_cost(500); 6053 effect(TEMP dst, TEMP xtmp); 6054 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6055 ins_encode %{ 6056 assert(VM_Version::supports_sse4_1(), "required"); 6057 // Get the lo-hi products, only the lower 32 bits is in concerns 6058 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6059 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6060 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6061 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6062 __ psllq($dst$$XMMRegister, 32); 6063 // Get the lo-lo products 6064 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6065 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6066 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6072 predicate(UseAVX > 0 && 6073 ((Matcher::vector_length_in_bytes(n) == 64 && 6074 !VM_Version::supports_avx512dq()) || 6075 (Matcher::vector_length_in_bytes(n) < 64 && 6076 !VM_Version::supports_avx512vldq()))); 6077 match(Set dst (MulVL src1 src2)); 6078 effect(TEMP xtmp1, TEMP xtmp2); 6079 ins_cost(500); 6080 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6081 ins_encode %{ 6082 int vlen_enc = vector_length_encoding(this); 6083 // Get the lo-hi products, only the lower 32 bits is in concerns 6084 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6085 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6086 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6087 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6088 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6089 // Get the lo-lo products 6090 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6091 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6092 %} 6093 ins_pipe( pipe_slow ); 6094 %} 6095 6096 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6097 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6098 match(Set dst (MulVL src1 src2)); 6099 ins_cost(100); 6100 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6101 ins_encode %{ 6102 int vlen_enc = vector_length_encoding(this); 6103 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6109 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6110 match(Set dst (MulVL src1 src2)); 6111 ins_cost(100); 6112 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6113 ins_encode %{ 6114 int vlen_enc = vector_length_encoding(this); 6115 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 // Floats vector mul 6121 instruct vmulF(vec dst, vec src) %{ 6122 predicate(UseAVX == 0); 6123 match(Set dst (MulVF dst src)); 6124 format %{ "mulps $dst,$src\t! mul packedF" %} 6125 ins_encode %{ 6126 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6132 predicate(UseAVX > 0); 6133 match(Set dst (MulVF src1 src2)); 6134 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6135 ins_encode %{ 6136 int vlen_enc = vector_length_encoding(this); 6137 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6143 predicate((UseAVX > 0) && 6144 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6145 match(Set dst (MulVF src (LoadVector mem))); 6146 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6147 ins_encode %{ 6148 int vlen_enc = vector_length_encoding(this); 6149 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6150 %} 6151 ins_pipe( pipe_slow ); 6152 %} 6153 6154 // Doubles vector mul 6155 instruct vmulD(vec dst, vec src) %{ 6156 predicate(UseAVX == 0); 6157 match(Set dst (MulVD dst src)); 6158 format %{ "mulpd $dst,$src\t! mul packedD" %} 6159 ins_encode %{ 6160 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6166 predicate(UseAVX > 0); 6167 match(Set dst (MulVD src1 src2)); 6168 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6169 ins_encode %{ 6170 int vlen_enc = vector_length_encoding(this); 6171 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6177 predicate((UseAVX > 0) && 6178 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6179 match(Set dst (MulVD src (LoadVector mem))); 6180 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6181 ins_encode %{ 6182 int vlen_enc = vector_length_encoding(this); 6183 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6184 %} 6185 ins_pipe( pipe_slow ); 6186 %} 6187 6188 // --------------------------------- DIV -------------------------------------- 6189 6190 // Floats vector div 6191 instruct vdivF(vec dst, vec src) %{ 6192 predicate(UseAVX == 0); 6193 match(Set dst (DivVF dst src)); 6194 format %{ "divps $dst,$src\t! div packedF" %} 6195 ins_encode %{ 6196 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6202 predicate(UseAVX > 0); 6203 match(Set dst (DivVF src1 src2)); 6204 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6205 ins_encode %{ 6206 int vlen_enc = vector_length_encoding(this); 6207 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6213 predicate((UseAVX > 0) && 6214 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6215 match(Set dst (DivVF src (LoadVector mem))); 6216 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6217 ins_encode %{ 6218 int vlen_enc = vector_length_encoding(this); 6219 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 // Doubles vector div 6225 instruct vdivD(vec dst, vec src) %{ 6226 predicate(UseAVX == 0); 6227 match(Set dst (DivVD dst src)); 6228 format %{ "divpd $dst,$src\t! div packedD" %} 6229 ins_encode %{ 6230 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6231 %} 6232 ins_pipe( pipe_slow ); 6233 %} 6234 6235 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6236 predicate(UseAVX > 0); 6237 match(Set dst (DivVD src1 src2)); 6238 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6239 ins_encode %{ 6240 int vlen_enc = vector_length_encoding(this); 6241 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6247 predicate((UseAVX > 0) && 6248 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6249 match(Set dst (DivVD src (LoadVector mem))); 6250 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6251 ins_encode %{ 6252 int vlen_enc = vector_length_encoding(this); 6253 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 // ------------------------------ MinMax --------------------------------------- 6259 6260 // Byte, Short, Int vector Min/Max 6261 instruct minmax_reg_sse(vec dst, vec src) %{ 6262 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6263 UseAVX == 0); 6264 match(Set dst (MinV dst src)); 6265 match(Set dst (MaxV dst src)); 6266 format %{ "vector_minmax $dst,$src\t! " %} 6267 ins_encode %{ 6268 assert(UseSSE >= 4, "required"); 6269 6270 int opcode = this->ideal_Opcode(); 6271 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6272 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6273 %} 6274 ins_pipe( pipe_slow ); 6275 %} 6276 6277 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6278 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6279 UseAVX > 0); 6280 match(Set dst (MinV src1 src2)); 6281 match(Set dst (MaxV src1 src2)); 6282 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6283 ins_encode %{ 6284 int opcode = this->ideal_Opcode(); 6285 int vlen_enc = vector_length_encoding(this); 6286 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6287 6288 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6289 %} 6290 ins_pipe( pipe_slow ); 6291 %} 6292 6293 // Long vector Min/Max 6294 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6295 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6296 UseAVX == 0); 6297 match(Set dst (MinV dst src)); 6298 match(Set dst (MaxV src dst)); 6299 effect(TEMP dst, TEMP tmp); 6300 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6301 ins_encode %{ 6302 assert(UseSSE >= 4, "required"); 6303 6304 int opcode = this->ideal_Opcode(); 6305 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6306 assert(elem_bt == T_LONG, "sanity"); 6307 6308 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6309 %} 6310 ins_pipe( pipe_slow ); 6311 %} 6312 6313 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6314 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6315 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6316 match(Set dst (MinV src1 src2)); 6317 match(Set dst (MaxV src1 src2)); 6318 effect(TEMP dst); 6319 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6320 ins_encode %{ 6321 int vlen_enc = vector_length_encoding(this); 6322 int opcode = this->ideal_Opcode(); 6323 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6324 assert(elem_bt == T_LONG, "sanity"); 6325 6326 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6332 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6333 Matcher::vector_element_basic_type(n) == T_LONG); 6334 match(Set dst (MinV src1 src2)); 6335 match(Set dst (MaxV src1 src2)); 6336 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6337 ins_encode %{ 6338 assert(UseAVX > 2, "required"); 6339 6340 int vlen_enc = vector_length_encoding(this); 6341 int opcode = this->ideal_Opcode(); 6342 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6343 assert(elem_bt == T_LONG, "sanity"); 6344 6345 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 // Float/Double vector Min/Max 6351 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6352 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6353 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6354 UseAVX > 0); 6355 match(Set dst (MinV a b)); 6356 match(Set dst (MaxV a b)); 6357 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6358 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6359 ins_encode %{ 6360 assert(UseAVX > 0, "required"); 6361 6362 int opcode = this->ideal_Opcode(); 6363 int vlen_enc = vector_length_encoding(this); 6364 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6365 6366 __ vminmax_fp(opcode, elem_bt, 6367 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6368 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6374 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6375 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6376 match(Set dst (MinV a b)); 6377 match(Set dst (MaxV a b)); 6378 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6379 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6380 ins_encode %{ 6381 assert(UseAVX > 2, "required"); 6382 6383 int opcode = this->ideal_Opcode(); 6384 int vlen_enc = vector_length_encoding(this); 6385 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6386 6387 __ evminmax_fp(opcode, elem_bt, 6388 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6389 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 // ------------------------------ Unsigned vector Min/Max ---------------------- 6395 6396 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6397 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6398 match(Set dst (UMinV a b)); 6399 match(Set dst (UMaxV a b)); 6400 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6401 ins_encode %{ 6402 int opcode = this->ideal_Opcode(); 6403 int vlen_enc = vector_length_encoding(this); 6404 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6405 assert(is_integral_type(elem_bt), ""); 6406 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6407 %} 6408 ins_pipe( pipe_slow ); 6409 %} 6410 6411 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6412 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6413 match(Set dst (UMinV a (LoadVector b))); 6414 match(Set dst (UMaxV a (LoadVector b))); 6415 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6416 ins_encode %{ 6417 int opcode = this->ideal_Opcode(); 6418 int vlen_enc = vector_length_encoding(this); 6419 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6420 assert(is_integral_type(elem_bt), ""); 6421 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6427 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6428 match(Set dst (UMinV a b)); 6429 match(Set dst (UMaxV a b)); 6430 effect(TEMP xtmp1, TEMP xtmp2); 6431 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6432 ins_encode %{ 6433 int opcode = this->ideal_Opcode(); 6434 int vlen_enc = vector_length_encoding(this); 6435 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6436 %} 6437 ins_pipe( pipe_slow ); 6438 %} 6439 6440 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6441 match(Set dst (UMinV (Binary dst src2) mask)); 6442 match(Set dst (UMaxV (Binary dst src2) mask)); 6443 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6444 ins_encode %{ 6445 int vlen_enc = vector_length_encoding(this); 6446 BasicType bt = Matcher::vector_element_basic_type(this); 6447 int opc = this->ideal_Opcode(); 6448 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6449 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6455 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6456 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6457 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6458 ins_encode %{ 6459 int vlen_enc = vector_length_encoding(this); 6460 BasicType bt = Matcher::vector_element_basic_type(this); 6461 int opc = this->ideal_Opcode(); 6462 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6463 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6464 %} 6465 ins_pipe( pipe_slow ); 6466 %} 6467 6468 // --------------------------------- Signum/CopySign --------------------------- 6469 6470 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6471 match(Set dst (SignumF dst (Binary zero one))); 6472 effect(KILL cr); 6473 format %{ "signumF $dst, $dst" %} 6474 ins_encode %{ 6475 int opcode = this->ideal_Opcode(); 6476 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6482 match(Set dst (SignumD dst (Binary zero one))); 6483 effect(KILL cr); 6484 format %{ "signumD $dst, $dst" %} 6485 ins_encode %{ 6486 int opcode = this->ideal_Opcode(); 6487 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6493 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6494 match(Set dst (SignumVF src (Binary zero one))); 6495 match(Set dst (SignumVD src (Binary zero one))); 6496 effect(TEMP dst, TEMP xtmp1); 6497 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6498 ins_encode %{ 6499 int opcode = this->ideal_Opcode(); 6500 int vec_enc = vector_length_encoding(this); 6501 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6502 $xtmp1$$XMMRegister, vec_enc); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6508 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6509 match(Set dst (SignumVF src (Binary zero one))); 6510 match(Set dst (SignumVD src (Binary zero one))); 6511 effect(TEMP dst, TEMP ktmp1); 6512 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6513 ins_encode %{ 6514 int opcode = this->ideal_Opcode(); 6515 int vec_enc = vector_length_encoding(this); 6516 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6517 $ktmp1$$KRegister, vec_enc); 6518 %} 6519 ins_pipe( pipe_slow ); 6520 %} 6521 6522 // --------------------------------------- 6523 // For copySign use 0xE4 as writemask for vpternlog 6524 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6525 // C (xmm2) is set to 0x7FFFFFFF 6526 // Wherever xmm2 is 0, we want to pick from B (sign) 6527 // Wherever xmm2 is 1, we want to pick from A (src) 6528 // 6529 // A B C Result 6530 // 0 0 0 0 6531 // 0 0 1 0 6532 // 0 1 0 1 6533 // 0 1 1 0 6534 // 1 0 0 0 6535 // 1 0 1 1 6536 // 1 1 0 1 6537 // 1 1 1 1 6538 // 6539 // Result going from high bit to low bit is 0x11100100 = 0xe4 6540 // --------------------------------------- 6541 6542 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6543 match(Set dst (CopySignF dst src)); 6544 effect(TEMP tmp1, TEMP tmp2); 6545 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6546 ins_encode %{ 6547 __ movl($tmp2$$Register, 0x7FFFFFFF); 6548 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6549 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6550 %} 6551 ins_pipe( pipe_slow ); 6552 %} 6553 6554 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6555 match(Set dst (CopySignD dst (Binary src zero))); 6556 ins_cost(100); 6557 effect(TEMP tmp1, TEMP tmp2); 6558 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6559 ins_encode %{ 6560 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6561 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6562 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6563 %} 6564 ins_pipe( pipe_slow ); 6565 %} 6566 6567 //----------------------------- CompressBits/ExpandBits ------------------------ 6568 6569 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6570 predicate(n->bottom_type()->isa_int()); 6571 match(Set dst (CompressBits src mask)); 6572 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6573 ins_encode %{ 6574 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6580 predicate(n->bottom_type()->isa_int()); 6581 match(Set dst (ExpandBits src mask)); 6582 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6583 ins_encode %{ 6584 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6590 predicate(n->bottom_type()->isa_int()); 6591 match(Set dst (CompressBits src (LoadI mask))); 6592 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6593 ins_encode %{ 6594 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6595 %} 6596 ins_pipe( pipe_slow ); 6597 %} 6598 6599 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6600 predicate(n->bottom_type()->isa_int()); 6601 match(Set dst (ExpandBits src (LoadI mask))); 6602 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6603 ins_encode %{ 6604 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6605 %} 6606 ins_pipe( pipe_slow ); 6607 %} 6608 6609 // --------------------------------- Sqrt -------------------------------------- 6610 6611 instruct vsqrtF_reg(vec dst, vec src) %{ 6612 match(Set dst (SqrtVF src)); 6613 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6614 ins_encode %{ 6615 assert(UseAVX > 0, "required"); 6616 int vlen_enc = vector_length_encoding(this); 6617 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct vsqrtF_mem(vec dst, memory mem) %{ 6623 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6624 match(Set dst (SqrtVF (LoadVector mem))); 6625 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6626 ins_encode %{ 6627 assert(UseAVX > 0, "required"); 6628 int vlen_enc = vector_length_encoding(this); 6629 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 // Floating point vector sqrt 6635 instruct vsqrtD_reg(vec dst, vec src) %{ 6636 match(Set dst (SqrtVD src)); 6637 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6638 ins_encode %{ 6639 assert(UseAVX > 0, "required"); 6640 int vlen_enc = vector_length_encoding(this); 6641 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6642 %} 6643 ins_pipe( pipe_slow ); 6644 %} 6645 6646 instruct vsqrtD_mem(vec dst, memory mem) %{ 6647 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6648 match(Set dst (SqrtVD (LoadVector mem))); 6649 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6650 ins_encode %{ 6651 assert(UseAVX > 0, "required"); 6652 int vlen_enc = vector_length_encoding(this); 6653 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6654 %} 6655 ins_pipe( pipe_slow ); 6656 %} 6657 6658 // ------------------------------ Shift --------------------------------------- 6659 6660 // Left and right shift count vectors are the same on x86 6661 // (only lowest bits of xmm reg are used for count). 6662 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6663 match(Set dst (LShiftCntV cnt)); 6664 match(Set dst (RShiftCntV cnt)); 6665 format %{ "movdl $dst,$cnt\t! load shift count" %} 6666 ins_encode %{ 6667 __ movdl($dst$$XMMRegister, $cnt$$Register); 6668 %} 6669 ins_pipe( pipe_slow ); 6670 %} 6671 6672 // Byte vector shift 6673 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6674 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6675 match(Set dst ( LShiftVB src shift)); 6676 match(Set dst ( RShiftVB src shift)); 6677 match(Set dst (URShiftVB src shift)); 6678 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6679 format %{"vector_byte_shift $dst,$src,$shift" %} 6680 ins_encode %{ 6681 assert(UseSSE > 3, "required"); 6682 int opcode = this->ideal_Opcode(); 6683 bool sign = (opcode != Op_URShiftVB); 6684 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6685 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6686 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6687 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6688 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6689 %} 6690 ins_pipe( pipe_slow ); 6691 %} 6692 6693 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6694 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6695 UseAVX <= 1); 6696 match(Set dst ( LShiftVB src shift)); 6697 match(Set dst ( RShiftVB src shift)); 6698 match(Set dst (URShiftVB src shift)); 6699 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6700 format %{"vector_byte_shift $dst,$src,$shift" %} 6701 ins_encode %{ 6702 assert(UseSSE > 3, "required"); 6703 int opcode = this->ideal_Opcode(); 6704 bool sign = (opcode != Op_URShiftVB); 6705 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6706 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6707 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6708 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6709 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6710 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6711 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6712 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6713 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6714 %} 6715 ins_pipe( pipe_slow ); 6716 %} 6717 6718 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6719 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6720 UseAVX > 1); 6721 match(Set dst ( LShiftVB src shift)); 6722 match(Set dst ( RShiftVB src shift)); 6723 match(Set dst (URShiftVB src shift)); 6724 effect(TEMP dst, TEMP tmp); 6725 format %{"vector_byte_shift $dst,$src,$shift" %} 6726 ins_encode %{ 6727 int opcode = this->ideal_Opcode(); 6728 bool sign = (opcode != Op_URShiftVB); 6729 int vlen_enc = Assembler::AVX_256bit; 6730 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6731 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6732 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6733 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6734 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6740 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6741 match(Set dst ( LShiftVB src shift)); 6742 match(Set dst ( RShiftVB src shift)); 6743 match(Set dst (URShiftVB src shift)); 6744 effect(TEMP dst, TEMP tmp); 6745 format %{"vector_byte_shift $dst,$src,$shift" %} 6746 ins_encode %{ 6747 assert(UseAVX > 1, "required"); 6748 int opcode = this->ideal_Opcode(); 6749 bool sign = (opcode != Op_URShiftVB); 6750 int vlen_enc = Assembler::AVX_256bit; 6751 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6752 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6753 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6754 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6755 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6756 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6757 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6758 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6759 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6760 %} 6761 ins_pipe( pipe_slow ); 6762 %} 6763 6764 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6765 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6766 match(Set dst ( LShiftVB src shift)); 6767 match(Set dst (RShiftVB src shift)); 6768 match(Set dst (URShiftVB src shift)); 6769 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6770 format %{"vector_byte_shift $dst,$src,$shift" %} 6771 ins_encode %{ 6772 assert(UseAVX > 2, "required"); 6773 int opcode = this->ideal_Opcode(); 6774 bool sign = (opcode != Op_URShiftVB); 6775 int vlen_enc = Assembler::AVX_512bit; 6776 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6777 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6778 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6779 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6780 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6781 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6782 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6783 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6784 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6785 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6786 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6787 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 6792 // Shorts vector logical right shift produces incorrect Java result 6793 // for negative data because java code convert short value into int with 6794 // sign extension before a shift. But char vectors are fine since chars are 6795 // unsigned values. 6796 // Shorts/Chars vector left shift 6797 instruct vshiftS(vec dst, vec src, vec shift) %{ 6798 predicate(!n->as_ShiftV()->is_var_shift()); 6799 match(Set dst ( LShiftVS src shift)); 6800 match(Set dst ( RShiftVS src shift)); 6801 match(Set dst (URShiftVS src shift)); 6802 effect(TEMP dst, USE src, USE shift); 6803 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6804 ins_encode %{ 6805 int opcode = this->ideal_Opcode(); 6806 if (UseAVX > 0) { 6807 int vlen_enc = vector_length_encoding(this); 6808 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6809 } else { 6810 int vlen = Matcher::vector_length(this); 6811 if (vlen == 2) { 6812 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6813 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6814 } else if (vlen == 4) { 6815 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6816 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6817 } else { 6818 assert (vlen == 8, "sanity"); 6819 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6820 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6821 } 6822 } 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 // Integers vector left shift 6828 instruct vshiftI(vec dst, vec src, vec shift) %{ 6829 predicate(!n->as_ShiftV()->is_var_shift()); 6830 match(Set dst ( LShiftVI src shift)); 6831 match(Set dst ( RShiftVI src shift)); 6832 match(Set dst (URShiftVI src shift)); 6833 effect(TEMP dst, USE src, USE shift); 6834 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6835 ins_encode %{ 6836 int opcode = this->ideal_Opcode(); 6837 if (UseAVX > 0) { 6838 int vlen_enc = vector_length_encoding(this); 6839 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6840 } else { 6841 int vlen = Matcher::vector_length(this); 6842 if (vlen == 2) { 6843 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6844 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6845 } else { 6846 assert(vlen == 4, "sanity"); 6847 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6848 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6849 } 6850 } 6851 %} 6852 ins_pipe( pipe_slow ); 6853 %} 6854 6855 // Integers vector left constant shift 6856 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6857 match(Set dst (LShiftVI src (LShiftCntV shift))); 6858 match(Set dst (RShiftVI src (RShiftCntV shift))); 6859 match(Set dst (URShiftVI src (RShiftCntV shift))); 6860 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6861 ins_encode %{ 6862 int opcode = this->ideal_Opcode(); 6863 if (UseAVX > 0) { 6864 int vector_len = vector_length_encoding(this); 6865 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6866 } else { 6867 int vlen = Matcher::vector_length(this); 6868 if (vlen == 2) { 6869 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6870 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6871 } else { 6872 assert(vlen == 4, "sanity"); 6873 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6874 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6875 } 6876 } 6877 %} 6878 ins_pipe( pipe_slow ); 6879 %} 6880 6881 // Longs vector shift 6882 instruct vshiftL(vec dst, vec src, vec shift) %{ 6883 predicate(!n->as_ShiftV()->is_var_shift()); 6884 match(Set dst ( LShiftVL src shift)); 6885 match(Set dst (URShiftVL src shift)); 6886 effect(TEMP dst, USE src, USE shift); 6887 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6888 ins_encode %{ 6889 int opcode = this->ideal_Opcode(); 6890 if (UseAVX > 0) { 6891 int vlen_enc = vector_length_encoding(this); 6892 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6893 } else { 6894 assert(Matcher::vector_length(this) == 2, ""); 6895 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6896 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6897 } 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 // Longs vector constant shift 6903 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6904 match(Set dst (LShiftVL src (LShiftCntV shift))); 6905 match(Set dst (URShiftVL src (RShiftCntV shift))); 6906 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6907 ins_encode %{ 6908 int opcode = this->ideal_Opcode(); 6909 if (UseAVX > 0) { 6910 int vector_len = vector_length_encoding(this); 6911 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6912 } else { 6913 assert(Matcher::vector_length(this) == 2, ""); 6914 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6915 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6916 } 6917 %} 6918 ins_pipe( pipe_slow ); 6919 %} 6920 6921 // -------------------ArithmeticRightShift ----------------------------------- 6922 // Long vector arithmetic right shift 6923 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6924 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6925 match(Set dst (RShiftVL src shift)); 6926 effect(TEMP dst, TEMP tmp); 6927 format %{ "vshiftq $dst,$src,$shift" %} 6928 ins_encode %{ 6929 uint vlen = Matcher::vector_length(this); 6930 if (vlen == 2) { 6931 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6932 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6933 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6934 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6935 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6936 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6937 } else { 6938 assert(vlen == 4, "sanity"); 6939 assert(UseAVX > 1, "required"); 6940 int vlen_enc = Assembler::AVX_256bit; 6941 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6942 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6943 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6944 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6945 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6946 } 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6952 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6953 match(Set dst (RShiftVL src shift)); 6954 format %{ "vshiftq $dst,$src,$shift" %} 6955 ins_encode %{ 6956 int vlen_enc = vector_length_encoding(this); 6957 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 // ------------------- Variable Shift ----------------------------- 6963 // Byte variable shift 6964 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6965 predicate(Matcher::vector_length(n) <= 8 && 6966 n->as_ShiftV()->is_var_shift() && 6967 !VM_Version::supports_avx512bw()); 6968 match(Set dst ( LShiftVB src shift)); 6969 match(Set dst ( RShiftVB src shift)); 6970 match(Set dst (URShiftVB src shift)); 6971 effect(TEMP dst, TEMP vtmp); 6972 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6973 ins_encode %{ 6974 assert(UseAVX >= 2, "required"); 6975 6976 int opcode = this->ideal_Opcode(); 6977 int vlen_enc = Assembler::AVX_128bit; 6978 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6979 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6985 predicate(Matcher::vector_length(n) == 16 && 6986 n->as_ShiftV()->is_var_shift() && 6987 !VM_Version::supports_avx512bw()); 6988 match(Set dst ( LShiftVB src shift)); 6989 match(Set dst ( RShiftVB src shift)); 6990 match(Set dst (URShiftVB src shift)); 6991 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6992 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6993 ins_encode %{ 6994 assert(UseAVX >= 2, "required"); 6995 6996 int opcode = this->ideal_Opcode(); 6997 int vlen_enc = Assembler::AVX_128bit; 6998 // Shift lower half and get word result in dst 6999 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7000 7001 // Shift upper half and get word result in vtmp1 7002 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7003 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7004 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7005 7006 // Merge and down convert the two word results to byte in dst 7007 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7008 %} 7009 ins_pipe( pipe_slow ); 7010 %} 7011 7012 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7013 predicate(Matcher::vector_length(n) == 32 && 7014 n->as_ShiftV()->is_var_shift() && 7015 !VM_Version::supports_avx512bw()); 7016 match(Set dst ( LShiftVB src shift)); 7017 match(Set dst ( RShiftVB src shift)); 7018 match(Set dst (URShiftVB src shift)); 7019 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7020 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7021 ins_encode %{ 7022 assert(UseAVX >= 2, "required"); 7023 7024 int opcode = this->ideal_Opcode(); 7025 int vlen_enc = Assembler::AVX_128bit; 7026 // Process lower 128 bits and get result in dst 7027 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7028 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7029 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7030 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7031 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7032 7033 // Process higher 128 bits and get result in vtmp3 7034 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7035 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7036 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7037 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7038 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7039 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7040 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7041 7042 // Merge the two results in dst 7043 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7049 predicate(Matcher::vector_length(n) <= 32 && 7050 n->as_ShiftV()->is_var_shift() && 7051 VM_Version::supports_avx512bw()); 7052 match(Set dst ( LShiftVB src shift)); 7053 match(Set dst ( RShiftVB src shift)); 7054 match(Set dst (URShiftVB src shift)); 7055 effect(TEMP dst, TEMP vtmp); 7056 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7057 ins_encode %{ 7058 assert(UseAVX > 2, "required"); 7059 7060 int opcode = this->ideal_Opcode(); 7061 int vlen_enc = vector_length_encoding(this); 7062 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7063 %} 7064 ins_pipe( pipe_slow ); 7065 %} 7066 7067 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7068 predicate(Matcher::vector_length(n) == 64 && 7069 n->as_ShiftV()->is_var_shift() && 7070 VM_Version::supports_avx512bw()); 7071 match(Set dst ( LShiftVB src shift)); 7072 match(Set dst ( RShiftVB src shift)); 7073 match(Set dst (URShiftVB src shift)); 7074 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7075 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7076 ins_encode %{ 7077 assert(UseAVX > 2, "required"); 7078 7079 int opcode = this->ideal_Opcode(); 7080 int vlen_enc = Assembler::AVX_256bit; 7081 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7082 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7083 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7084 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7085 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7086 %} 7087 ins_pipe( pipe_slow ); 7088 %} 7089 7090 // Short variable shift 7091 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7092 predicate(Matcher::vector_length(n) <= 8 && 7093 n->as_ShiftV()->is_var_shift() && 7094 !VM_Version::supports_avx512bw()); 7095 match(Set dst ( LShiftVS src shift)); 7096 match(Set dst ( RShiftVS src shift)); 7097 match(Set dst (URShiftVS src shift)); 7098 effect(TEMP dst, TEMP vtmp); 7099 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7100 ins_encode %{ 7101 assert(UseAVX >= 2, "required"); 7102 7103 int opcode = this->ideal_Opcode(); 7104 bool sign = (opcode != Op_URShiftVS); 7105 int vlen_enc = Assembler::AVX_256bit; 7106 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7107 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7108 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7109 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7110 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7111 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7117 predicate(Matcher::vector_length(n) == 16 && 7118 n->as_ShiftV()->is_var_shift() && 7119 !VM_Version::supports_avx512bw()); 7120 match(Set dst ( LShiftVS src shift)); 7121 match(Set dst ( RShiftVS src shift)); 7122 match(Set dst (URShiftVS src shift)); 7123 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7124 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7125 ins_encode %{ 7126 assert(UseAVX >= 2, "required"); 7127 7128 int opcode = this->ideal_Opcode(); 7129 bool sign = (opcode != Op_URShiftVS); 7130 int vlen_enc = Assembler::AVX_256bit; 7131 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7132 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7133 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7134 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7135 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7136 7137 // Shift upper half, with result in dst using vtmp1 as TEMP 7138 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7139 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7140 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7141 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7142 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7143 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7144 7145 // Merge lower and upper half result into dst 7146 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7147 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7153 predicate(n->as_ShiftV()->is_var_shift() && 7154 VM_Version::supports_avx512bw()); 7155 match(Set dst ( LShiftVS src shift)); 7156 match(Set dst ( RShiftVS src shift)); 7157 match(Set dst (URShiftVS src shift)); 7158 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7159 ins_encode %{ 7160 assert(UseAVX > 2, "required"); 7161 7162 int opcode = this->ideal_Opcode(); 7163 int vlen_enc = vector_length_encoding(this); 7164 if (!VM_Version::supports_avx512vl()) { 7165 vlen_enc = Assembler::AVX_512bit; 7166 } 7167 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7168 %} 7169 ins_pipe( pipe_slow ); 7170 %} 7171 7172 //Integer variable shift 7173 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7174 predicate(n->as_ShiftV()->is_var_shift()); 7175 match(Set dst ( LShiftVI src shift)); 7176 match(Set dst ( RShiftVI src shift)); 7177 match(Set dst (URShiftVI src shift)); 7178 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7179 ins_encode %{ 7180 assert(UseAVX >= 2, "required"); 7181 7182 int opcode = this->ideal_Opcode(); 7183 int vlen_enc = vector_length_encoding(this); 7184 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7185 %} 7186 ins_pipe( pipe_slow ); 7187 %} 7188 7189 //Long variable shift 7190 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7191 predicate(n->as_ShiftV()->is_var_shift()); 7192 match(Set dst ( LShiftVL src shift)); 7193 match(Set dst (URShiftVL src shift)); 7194 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7195 ins_encode %{ 7196 assert(UseAVX >= 2, "required"); 7197 7198 int opcode = this->ideal_Opcode(); 7199 int vlen_enc = vector_length_encoding(this); 7200 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7201 %} 7202 ins_pipe( pipe_slow ); 7203 %} 7204 7205 //Long variable right shift arithmetic 7206 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7207 predicate(Matcher::vector_length(n) <= 4 && 7208 n->as_ShiftV()->is_var_shift() && 7209 UseAVX == 2); 7210 match(Set dst (RShiftVL src shift)); 7211 effect(TEMP dst, TEMP vtmp); 7212 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7213 ins_encode %{ 7214 int opcode = this->ideal_Opcode(); 7215 int vlen_enc = vector_length_encoding(this); 7216 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7217 $vtmp$$XMMRegister); 7218 %} 7219 ins_pipe( pipe_slow ); 7220 %} 7221 7222 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7223 predicate(n->as_ShiftV()->is_var_shift() && 7224 UseAVX > 2); 7225 match(Set dst (RShiftVL src shift)); 7226 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7227 ins_encode %{ 7228 int opcode = this->ideal_Opcode(); 7229 int vlen_enc = vector_length_encoding(this); 7230 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 // --------------------------------- AND -------------------------------------- 7236 7237 instruct vand(vec dst, vec src) %{ 7238 predicate(UseAVX == 0); 7239 match(Set dst (AndV dst src)); 7240 format %{ "pand $dst,$src\t! and vectors" %} 7241 ins_encode %{ 7242 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7248 predicate(UseAVX > 0); 7249 match(Set dst (AndV src1 src2)); 7250 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7251 ins_encode %{ 7252 int vlen_enc = vector_length_encoding(this); 7253 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7254 %} 7255 ins_pipe( pipe_slow ); 7256 %} 7257 7258 instruct vand_mem(vec dst, vec src, memory mem) %{ 7259 predicate((UseAVX > 0) && 7260 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7261 match(Set dst (AndV src (LoadVector mem))); 7262 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7263 ins_encode %{ 7264 int vlen_enc = vector_length_encoding(this); 7265 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 // --------------------------------- OR --------------------------------------- 7271 7272 instruct vor(vec dst, vec src) %{ 7273 predicate(UseAVX == 0); 7274 match(Set dst (OrV dst src)); 7275 format %{ "por $dst,$src\t! or vectors" %} 7276 ins_encode %{ 7277 __ por($dst$$XMMRegister, $src$$XMMRegister); 7278 %} 7279 ins_pipe( pipe_slow ); 7280 %} 7281 7282 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7283 predicate(UseAVX > 0); 7284 match(Set dst (OrV src1 src2)); 7285 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7286 ins_encode %{ 7287 int vlen_enc = vector_length_encoding(this); 7288 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7289 %} 7290 ins_pipe( pipe_slow ); 7291 %} 7292 7293 instruct vor_mem(vec dst, vec src, memory mem) %{ 7294 predicate((UseAVX > 0) && 7295 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7296 match(Set dst (OrV src (LoadVector mem))); 7297 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7298 ins_encode %{ 7299 int vlen_enc = vector_length_encoding(this); 7300 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 // --------------------------------- XOR -------------------------------------- 7306 7307 instruct vxor(vec dst, vec src) %{ 7308 predicate(UseAVX == 0); 7309 match(Set dst (XorV dst src)); 7310 format %{ "pxor $dst,$src\t! xor vectors" %} 7311 ins_encode %{ 7312 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7313 %} 7314 ins_pipe( pipe_slow ); 7315 %} 7316 7317 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7318 predicate(UseAVX > 0); 7319 match(Set dst (XorV src1 src2)); 7320 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7321 ins_encode %{ 7322 int vlen_enc = vector_length_encoding(this); 7323 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7324 %} 7325 ins_pipe( pipe_slow ); 7326 %} 7327 7328 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7329 predicate((UseAVX > 0) && 7330 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7331 match(Set dst (XorV src (LoadVector mem))); 7332 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7333 ins_encode %{ 7334 int vlen_enc = vector_length_encoding(this); 7335 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 // --------------------------------- VectorCast -------------------------------------- 7341 7342 instruct vcastBtoX(vec dst, vec src) %{ 7343 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7344 match(Set dst (VectorCastB2X src)); 7345 format %{ "vector_cast_b2x $dst,$src\t!" %} 7346 ins_encode %{ 7347 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7348 int vlen_enc = vector_length_encoding(this); 7349 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7350 %} 7351 ins_pipe( pipe_slow ); 7352 %} 7353 7354 instruct vcastBtoD(legVec dst, legVec src) %{ 7355 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7356 match(Set dst (VectorCastB2X src)); 7357 format %{ "vector_cast_b2x $dst,$src\t!" %} 7358 ins_encode %{ 7359 int vlen_enc = vector_length_encoding(this); 7360 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7361 %} 7362 ins_pipe( pipe_slow ); 7363 %} 7364 7365 instruct castStoX(vec dst, vec src) %{ 7366 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7367 Matcher::vector_length(n->in(1)) <= 8 && // src 7368 Matcher::vector_element_basic_type(n) == T_BYTE); 7369 match(Set dst (VectorCastS2X src)); 7370 format %{ "vector_cast_s2x $dst,$src" %} 7371 ins_encode %{ 7372 assert(UseAVX > 0, "required"); 7373 7374 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7375 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7376 %} 7377 ins_pipe( pipe_slow ); 7378 %} 7379 7380 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7381 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7382 Matcher::vector_length(n->in(1)) == 16 && // src 7383 Matcher::vector_element_basic_type(n) == T_BYTE); 7384 effect(TEMP dst, TEMP vtmp); 7385 match(Set dst (VectorCastS2X src)); 7386 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7387 ins_encode %{ 7388 assert(UseAVX > 0, "required"); 7389 7390 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7391 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7392 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7393 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 instruct vcastStoX_evex(vec dst, vec src) %{ 7399 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7400 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7401 match(Set dst (VectorCastS2X src)); 7402 format %{ "vector_cast_s2x $dst,$src\t!" %} 7403 ins_encode %{ 7404 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7405 int src_vlen_enc = vector_length_encoding(this, $src); 7406 int vlen_enc = vector_length_encoding(this); 7407 switch (to_elem_bt) { 7408 case T_BYTE: 7409 if (!VM_Version::supports_avx512vl()) { 7410 vlen_enc = Assembler::AVX_512bit; 7411 } 7412 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7413 break; 7414 case T_INT: 7415 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7416 break; 7417 case T_FLOAT: 7418 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7419 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7420 break; 7421 case T_LONG: 7422 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7423 break; 7424 case T_DOUBLE: { 7425 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7426 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7427 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7428 break; 7429 } 7430 default: 7431 ShouldNotReachHere(); 7432 } 7433 %} 7434 ins_pipe( pipe_slow ); 7435 %} 7436 7437 instruct castItoX(vec dst, vec src) %{ 7438 predicate(UseAVX <= 2 && 7439 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7440 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7441 match(Set dst (VectorCastI2X src)); 7442 format %{ "vector_cast_i2x $dst,$src" %} 7443 ins_encode %{ 7444 assert(UseAVX > 0, "required"); 7445 7446 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7447 int vlen_enc = vector_length_encoding(this, $src); 7448 7449 if (to_elem_bt == T_BYTE) { 7450 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7451 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7452 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7453 } else { 7454 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7455 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7456 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7457 } 7458 %} 7459 ins_pipe( pipe_slow ); 7460 %} 7461 7462 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7463 predicate(UseAVX <= 2 && 7464 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7465 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7466 match(Set dst (VectorCastI2X src)); 7467 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7468 effect(TEMP dst, TEMP vtmp); 7469 ins_encode %{ 7470 assert(UseAVX > 0, "required"); 7471 7472 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7473 int vlen_enc = vector_length_encoding(this, $src); 7474 7475 if (to_elem_bt == T_BYTE) { 7476 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7477 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7478 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7479 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7480 } else { 7481 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7482 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7483 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7484 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7485 } 7486 %} 7487 ins_pipe( pipe_slow ); 7488 %} 7489 7490 instruct vcastItoX_evex(vec dst, vec src) %{ 7491 predicate(UseAVX > 2 || 7492 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7493 match(Set dst (VectorCastI2X src)); 7494 format %{ "vector_cast_i2x $dst,$src\t!" %} 7495 ins_encode %{ 7496 assert(UseAVX > 0, "required"); 7497 7498 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7499 int src_vlen_enc = vector_length_encoding(this, $src); 7500 int dst_vlen_enc = vector_length_encoding(this); 7501 switch (dst_elem_bt) { 7502 case T_BYTE: 7503 if (!VM_Version::supports_avx512vl()) { 7504 src_vlen_enc = Assembler::AVX_512bit; 7505 } 7506 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7507 break; 7508 case T_SHORT: 7509 if (!VM_Version::supports_avx512vl()) { 7510 src_vlen_enc = Assembler::AVX_512bit; 7511 } 7512 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7513 break; 7514 case T_FLOAT: 7515 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7516 break; 7517 case T_LONG: 7518 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7519 break; 7520 case T_DOUBLE: 7521 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7522 break; 7523 default: 7524 ShouldNotReachHere(); 7525 } 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 instruct vcastLtoBS(vec dst, vec src) %{ 7531 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7532 UseAVX <= 2); 7533 match(Set dst (VectorCastL2X src)); 7534 format %{ "vector_cast_l2x $dst,$src" %} 7535 ins_encode %{ 7536 assert(UseAVX > 0, "required"); 7537 7538 int vlen = Matcher::vector_length_in_bytes(this, $src); 7539 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7540 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7541 : ExternalAddress(vector_int_to_short_mask()); 7542 if (vlen <= 16) { 7543 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7544 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7545 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7546 } else { 7547 assert(vlen <= 32, "required"); 7548 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7549 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7550 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7551 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7552 } 7553 if (to_elem_bt == T_BYTE) { 7554 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7555 } 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vcastLtoX_evex(vec dst, vec src) %{ 7561 predicate(UseAVX > 2 || 7562 (Matcher::vector_element_basic_type(n) == T_INT || 7563 Matcher::vector_element_basic_type(n) == T_FLOAT || 7564 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7565 match(Set dst (VectorCastL2X src)); 7566 format %{ "vector_cast_l2x $dst,$src\t!" %} 7567 ins_encode %{ 7568 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7569 int vlen = Matcher::vector_length_in_bytes(this, $src); 7570 int vlen_enc = vector_length_encoding(this, $src); 7571 switch (to_elem_bt) { 7572 case T_BYTE: 7573 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7574 vlen_enc = Assembler::AVX_512bit; 7575 } 7576 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7577 break; 7578 case T_SHORT: 7579 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7580 vlen_enc = Assembler::AVX_512bit; 7581 } 7582 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7583 break; 7584 case T_INT: 7585 if (vlen == 8) { 7586 if ($dst$$XMMRegister != $src$$XMMRegister) { 7587 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7588 } 7589 } else if (vlen == 16) { 7590 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7591 } else if (vlen == 32) { 7592 if (UseAVX > 2) { 7593 if (!VM_Version::supports_avx512vl()) { 7594 vlen_enc = Assembler::AVX_512bit; 7595 } 7596 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7597 } else { 7598 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7599 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7600 } 7601 } else { // vlen == 64 7602 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7603 } 7604 break; 7605 case T_FLOAT: 7606 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7607 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7608 break; 7609 case T_DOUBLE: 7610 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7611 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7612 break; 7613 7614 default: assert(false, "%s", type2name(to_elem_bt)); 7615 } 7616 %} 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 instruct vcastFtoD_reg(vec dst, vec src) %{ 7621 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7622 match(Set dst (VectorCastF2X src)); 7623 format %{ "vector_cast_f2d $dst,$src\t!" %} 7624 ins_encode %{ 7625 int vlen_enc = vector_length_encoding(this); 7626 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7627 %} 7628 ins_pipe( pipe_slow ); 7629 %} 7630 7631 7632 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7633 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7634 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7635 match(Set dst (VectorCastF2X src)); 7636 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7637 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7638 ins_encode %{ 7639 int vlen_enc = vector_length_encoding(this, $src); 7640 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7641 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7642 // 32 bit addresses for register indirect addressing mode since stub constants 7643 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7644 // However, targets are free to increase this limit, but having a large code cache size 7645 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7646 // cap we save a temporary register allocation which in limiting case can prevent 7647 // spilling in high register pressure blocks. 7648 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7649 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7650 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7656 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7657 is_integral_type(Matcher::vector_element_basic_type(n))); 7658 match(Set dst (VectorCastF2X src)); 7659 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7660 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7661 ins_encode %{ 7662 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7663 if (to_elem_bt == T_LONG) { 7664 int vlen_enc = vector_length_encoding(this); 7665 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7666 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7667 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7668 } else { 7669 int vlen_enc = vector_length_encoding(this, $src); 7670 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7671 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7672 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7673 } 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vcastDtoF_reg(vec dst, vec src) %{ 7679 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7680 match(Set dst (VectorCastD2X src)); 7681 format %{ "vector_cast_d2x $dst,$src\t!" %} 7682 ins_encode %{ 7683 int vlen_enc = vector_length_encoding(this, $src); 7684 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7690 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7691 is_integral_type(Matcher::vector_element_basic_type(n))); 7692 match(Set dst (VectorCastD2X src)); 7693 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7694 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7695 ins_encode %{ 7696 int vlen_enc = vector_length_encoding(this, $src); 7697 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7698 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7699 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7700 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7701 %} 7702 ins_pipe( pipe_slow ); 7703 %} 7704 7705 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7706 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7707 is_integral_type(Matcher::vector_element_basic_type(n))); 7708 match(Set dst (VectorCastD2X src)); 7709 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7710 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7711 ins_encode %{ 7712 int vlen_enc = vector_length_encoding(this, $src); 7713 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7714 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7715 ExternalAddress(vector_float_signflip()); 7716 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7717 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct vucast(vec dst, vec src) %{ 7723 match(Set dst (VectorUCastB2X src)); 7724 match(Set dst (VectorUCastS2X src)); 7725 match(Set dst (VectorUCastI2X src)); 7726 format %{ "vector_ucast $dst,$src\t!" %} 7727 ins_encode %{ 7728 assert(UseAVX > 0, "required"); 7729 7730 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7731 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7732 int vlen_enc = vector_length_encoding(this); 7733 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7734 %} 7735 ins_pipe( pipe_slow ); 7736 %} 7737 7738 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7739 predicate(!VM_Version::supports_avx512vl() && 7740 Matcher::vector_length_in_bytes(n) < 64 && 7741 Matcher::vector_element_basic_type(n) == T_INT); 7742 match(Set dst (RoundVF src)); 7743 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7744 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7745 ins_encode %{ 7746 int vlen_enc = vector_length_encoding(this); 7747 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7748 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7749 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7750 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7756 predicate((VM_Version::supports_avx512vl() || 7757 Matcher::vector_length_in_bytes(n) == 64) && 7758 Matcher::vector_element_basic_type(n) == T_INT); 7759 match(Set dst (RoundVF src)); 7760 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7761 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7762 ins_encode %{ 7763 int vlen_enc = vector_length_encoding(this); 7764 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7765 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7766 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7767 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7773 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7774 match(Set dst (RoundVD src)); 7775 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7776 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7777 ins_encode %{ 7778 int vlen_enc = vector_length_encoding(this); 7779 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7780 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7781 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7782 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 // --------------------------------- VectorMaskCmp -------------------------------------- 7788 7789 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7790 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7791 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7792 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7793 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7794 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7795 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7796 ins_encode %{ 7797 int vlen_enc = vector_length_encoding(this, $src1); 7798 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7799 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7800 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7801 } else { 7802 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7803 } 7804 %} 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7809 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7810 n->bottom_type()->isa_vectmask() == nullptr && 7811 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7812 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7813 effect(TEMP ktmp); 7814 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7815 ins_encode %{ 7816 int vlen_enc = Assembler::AVX_512bit; 7817 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7818 KRegister mask = k0; // The comparison itself is not being masked. 7819 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7820 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7821 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7822 } else { 7823 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7824 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7825 } 7826 %} 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7831 predicate(n->bottom_type()->isa_vectmask() && 7832 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7833 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7834 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7835 ins_encode %{ 7836 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7837 int vlen_enc = vector_length_encoding(this, $src1); 7838 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7839 KRegister mask = k0; // The comparison itself is not being masked. 7840 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7841 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7842 } else { 7843 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7844 } 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7850 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7851 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7852 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7853 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7854 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7855 (n->in(2)->get_int() == BoolTest::eq || 7856 n->in(2)->get_int() == BoolTest::lt || 7857 n->in(2)->get_int() == BoolTest::gt)); // cond 7858 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7859 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7860 ins_encode %{ 7861 int vlen_enc = vector_length_encoding(this, $src1); 7862 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7863 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7864 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7865 %} 7866 ins_pipe( pipe_slow ); 7867 %} 7868 7869 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7870 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7871 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7872 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7873 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7874 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7875 (n->in(2)->get_int() == BoolTest::ne || 7876 n->in(2)->get_int() == BoolTest::le || 7877 n->in(2)->get_int() == BoolTest::ge)); // cond 7878 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7879 effect(TEMP dst, TEMP xtmp); 7880 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7881 ins_encode %{ 7882 int vlen_enc = vector_length_encoding(this, $src1); 7883 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7884 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7885 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7886 %} 7887 ins_pipe( pipe_slow ); 7888 %} 7889 7890 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7891 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7892 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7893 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7894 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7895 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7896 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7897 effect(TEMP dst, TEMP xtmp); 7898 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7899 ins_encode %{ 7900 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7901 int vlen_enc = vector_length_encoding(this, $src1); 7902 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7903 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7904 7905 if (vlen_enc == Assembler::AVX_128bit) { 7906 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7907 } else { 7908 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7909 } 7910 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7911 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7912 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7918 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7919 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7920 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7921 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7922 effect(TEMP ktmp); 7923 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7924 ins_encode %{ 7925 assert(UseAVX > 2, "required"); 7926 7927 int vlen_enc = vector_length_encoding(this, $src1); 7928 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7929 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7930 KRegister mask = k0; // The comparison itself is not being masked. 7931 bool merge = false; 7932 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7933 7934 switch (src1_elem_bt) { 7935 case T_INT: { 7936 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7937 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7938 break; 7939 } 7940 case T_LONG: { 7941 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7942 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7943 break; 7944 } 7945 default: assert(false, "%s", type2name(src1_elem_bt)); 7946 } 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 7952 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7953 predicate(n->bottom_type()->isa_vectmask() && 7954 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7955 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7956 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7957 ins_encode %{ 7958 assert(UseAVX > 2, "required"); 7959 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7960 7961 int vlen_enc = vector_length_encoding(this, $src1); 7962 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7963 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7964 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7965 7966 // Comparison i 7967 switch (src1_elem_bt) { 7968 case T_BYTE: { 7969 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7970 break; 7971 } 7972 case T_SHORT: { 7973 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7974 break; 7975 } 7976 case T_INT: { 7977 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7978 break; 7979 } 7980 case T_LONG: { 7981 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7982 break; 7983 } 7984 default: assert(false, "%s", type2name(src1_elem_bt)); 7985 } 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 // Extract 7991 7992 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7993 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7994 match(Set dst (ExtractI src idx)); 7995 match(Set dst (ExtractS src idx)); 7996 match(Set dst (ExtractB src idx)); 7997 format %{ "extractI $dst,$src,$idx\t!" %} 7998 ins_encode %{ 7999 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8000 8001 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8002 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8008 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8009 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8010 match(Set dst (ExtractI src idx)); 8011 match(Set dst (ExtractS src idx)); 8012 match(Set dst (ExtractB src idx)); 8013 effect(TEMP vtmp); 8014 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8015 ins_encode %{ 8016 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8017 8018 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8019 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8020 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8026 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8027 match(Set dst (ExtractL src idx)); 8028 format %{ "extractL $dst,$src,$idx\t!" %} 8029 ins_encode %{ 8030 assert(UseSSE >= 4, "required"); 8031 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8032 8033 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8039 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8040 Matcher::vector_length(n->in(1)) == 8); // src 8041 match(Set dst (ExtractL src idx)); 8042 effect(TEMP vtmp); 8043 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8044 ins_encode %{ 8045 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8046 8047 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8048 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8049 %} 8050 ins_pipe( pipe_slow ); 8051 %} 8052 8053 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8054 predicate(Matcher::vector_length(n->in(1)) <= 4); 8055 match(Set dst (ExtractF src idx)); 8056 effect(TEMP dst, TEMP vtmp); 8057 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8058 ins_encode %{ 8059 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8060 8061 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8062 %} 8063 ins_pipe( pipe_slow ); 8064 %} 8065 8066 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8067 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8068 Matcher::vector_length(n->in(1)/*src*/) == 16); 8069 match(Set dst (ExtractF src idx)); 8070 effect(TEMP vtmp); 8071 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8072 ins_encode %{ 8073 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8074 8075 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8076 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8082 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8083 match(Set dst (ExtractD src idx)); 8084 format %{ "extractD $dst,$src,$idx\t!" %} 8085 ins_encode %{ 8086 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8087 8088 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8089 %} 8090 ins_pipe( pipe_slow ); 8091 %} 8092 8093 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8094 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8095 Matcher::vector_length(n->in(1)) == 8); // src 8096 match(Set dst (ExtractD src idx)); 8097 effect(TEMP vtmp); 8098 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8099 ins_encode %{ 8100 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8101 8102 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8103 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8104 %} 8105 ins_pipe( pipe_slow ); 8106 %} 8107 8108 // --------------------------------- Vector Blend -------------------------------------- 8109 8110 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8111 predicate(UseAVX == 0); 8112 match(Set dst (VectorBlend (Binary dst src) mask)); 8113 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8114 effect(TEMP tmp); 8115 ins_encode %{ 8116 assert(UseSSE >= 4, "required"); 8117 8118 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8119 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8120 } 8121 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8127 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8128 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8129 Matcher::vector_length_in_bytes(n) <= 32 && 8130 is_integral_type(Matcher::vector_element_basic_type(n))); 8131 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8132 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8133 ins_encode %{ 8134 int vlen_enc = vector_length_encoding(this); 8135 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8136 %} 8137 ins_pipe( pipe_slow ); 8138 %} 8139 8140 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8141 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8142 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8143 Matcher::vector_length_in_bytes(n) <= 32 && 8144 !is_integral_type(Matcher::vector_element_basic_type(n))); 8145 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8146 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8147 ins_encode %{ 8148 int vlen_enc = vector_length_encoding(this); 8149 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8150 %} 8151 ins_pipe( pipe_slow ); 8152 %} 8153 8154 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8155 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8156 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8157 Matcher::vector_length_in_bytes(n) <= 32); 8158 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8159 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8160 effect(TEMP vtmp, TEMP dst); 8161 ins_encode %{ 8162 int vlen_enc = vector_length_encoding(this); 8163 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8164 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8165 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8171 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8172 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8173 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8174 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8175 effect(TEMP ktmp); 8176 ins_encode %{ 8177 int vlen_enc = Assembler::AVX_512bit; 8178 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8179 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8180 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8181 %} 8182 ins_pipe( pipe_slow ); 8183 %} 8184 8185 8186 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8187 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8188 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8189 VM_Version::supports_avx512bw())); 8190 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8191 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8192 ins_encode %{ 8193 int vlen_enc = vector_length_encoding(this); 8194 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8195 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 // --------------------------------- ABS -------------------------------------- 8201 // a = |a| 8202 instruct vabsB_reg(vec dst, vec src) %{ 8203 match(Set dst (AbsVB src)); 8204 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8205 ins_encode %{ 8206 uint vlen = Matcher::vector_length(this); 8207 if (vlen <= 16) { 8208 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8209 } else { 8210 int vlen_enc = vector_length_encoding(this); 8211 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8212 } 8213 %} 8214 ins_pipe( pipe_slow ); 8215 %} 8216 8217 instruct vabsS_reg(vec dst, vec src) %{ 8218 match(Set dst (AbsVS src)); 8219 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8220 ins_encode %{ 8221 uint vlen = Matcher::vector_length(this); 8222 if (vlen <= 8) { 8223 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8224 } else { 8225 int vlen_enc = vector_length_encoding(this); 8226 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8227 } 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vabsI_reg(vec dst, vec src) %{ 8233 match(Set dst (AbsVI src)); 8234 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8235 ins_encode %{ 8236 uint vlen = Matcher::vector_length(this); 8237 if (vlen <= 4) { 8238 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8239 } else { 8240 int vlen_enc = vector_length_encoding(this); 8241 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8242 } 8243 %} 8244 ins_pipe( pipe_slow ); 8245 %} 8246 8247 instruct vabsL_reg(vec dst, vec src) %{ 8248 match(Set dst (AbsVL src)); 8249 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8250 ins_encode %{ 8251 assert(UseAVX > 2, "required"); 8252 int vlen_enc = vector_length_encoding(this); 8253 if (!VM_Version::supports_avx512vl()) { 8254 vlen_enc = Assembler::AVX_512bit; 8255 } 8256 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8257 %} 8258 ins_pipe( pipe_slow ); 8259 %} 8260 8261 // --------------------------------- ABSNEG -------------------------------------- 8262 8263 instruct vabsnegF(vec dst, vec src) %{ 8264 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8265 match(Set dst (AbsVF src)); 8266 match(Set dst (NegVF src)); 8267 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8268 ins_cost(150); 8269 ins_encode %{ 8270 int opcode = this->ideal_Opcode(); 8271 int vlen = Matcher::vector_length(this); 8272 if (vlen == 2) { 8273 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8274 } else { 8275 assert(vlen == 8 || vlen == 16, "required"); 8276 int vlen_enc = vector_length_encoding(this); 8277 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8278 } 8279 %} 8280 ins_pipe( pipe_slow ); 8281 %} 8282 8283 instruct vabsneg4F(vec dst) %{ 8284 predicate(Matcher::vector_length(n) == 4); 8285 match(Set dst (AbsVF dst)); 8286 match(Set dst (NegVF dst)); 8287 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8288 ins_cost(150); 8289 ins_encode %{ 8290 int opcode = this->ideal_Opcode(); 8291 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8292 %} 8293 ins_pipe( pipe_slow ); 8294 %} 8295 8296 instruct vabsnegD(vec dst, vec src) %{ 8297 match(Set dst (AbsVD src)); 8298 match(Set dst (NegVD src)); 8299 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8300 ins_encode %{ 8301 int opcode = this->ideal_Opcode(); 8302 uint vlen = Matcher::vector_length(this); 8303 if (vlen == 2) { 8304 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8305 } else { 8306 int vlen_enc = vector_length_encoding(this); 8307 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8308 } 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 //------------------------------------- VectorTest -------------------------------------------- 8314 8315 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8316 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8317 match(Set cr (VectorTest src1 src2)); 8318 effect(TEMP vtmp); 8319 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8320 ins_encode %{ 8321 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8322 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8323 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8324 %} 8325 ins_pipe( pipe_slow ); 8326 %} 8327 8328 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8329 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8330 match(Set cr (VectorTest src1 src2)); 8331 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8332 ins_encode %{ 8333 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8334 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8335 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8336 %} 8337 ins_pipe( pipe_slow ); 8338 %} 8339 8340 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8341 predicate((Matcher::vector_length(n->in(1)) < 8 || 8342 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8343 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8344 match(Set cr (VectorTest src1 src2)); 8345 effect(TEMP tmp); 8346 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8347 ins_encode %{ 8348 uint masklen = Matcher::vector_length(this, $src1); 8349 __ kmovwl($tmp$$Register, $src1$$KRegister); 8350 __ andl($tmp$$Register, (1 << masklen) - 1); 8351 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8352 %} 8353 ins_pipe( pipe_slow ); 8354 %} 8355 8356 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8357 predicate((Matcher::vector_length(n->in(1)) < 8 || 8358 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8359 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8360 match(Set cr (VectorTest src1 src2)); 8361 effect(TEMP tmp); 8362 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8363 ins_encode %{ 8364 uint masklen = Matcher::vector_length(this, $src1); 8365 __ kmovwl($tmp$$Register, $src1$$KRegister); 8366 __ andl($tmp$$Register, (1 << masklen) - 1); 8367 %} 8368 ins_pipe( pipe_slow ); 8369 %} 8370 8371 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8372 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8373 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8374 match(Set cr (VectorTest src1 src2)); 8375 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8376 ins_encode %{ 8377 uint masklen = Matcher::vector_length(this, $src1); 8378 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8379 %} 8380 ins_pipe( pipe_slow ); 8381 %} 8382 8383 //------------------------------------- LoadMask -------------------------------------------- 8384 8385 instruct loadMask(legVec dst, legVec src) %{ 8386 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8387 match(Set dst (VectorLoadMask src)); 8388 effect(TEMP dst); 8389 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8390 ins_encode %{ 8391 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8392 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8393 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8394 %} 8395 ins_pipe( pipe_slow ); 8396 %} 8397 8398 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8399 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8400 match(Set dst (VectorLoadMask src)); 8401 effect(TEMP xtmp); 8402 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8403 ins_encode %{ 8404 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8405 true, Assembler::AVX_512bit); 8406 %} 8407 ins_pipe( pipe_slow ); 8408 %} 8409 8410 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8411 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8412 match(Set dst (VectorLoadMask src)); 8413 effect(TEMP xtmp); 8414 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8415 ins_encode %{ 8416 int vlen_enc = vector_length_encoding(in(1)); 8417 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8418 false, vlen_enc); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 //------------------------------------- StoreMask -------------------------------------------- 8424 8425 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8426 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8427 match(Set dst (VectorStoreMask src size)); 8428 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8429 ins_encode %{ 8430 int vlen = Matcher::vector_length(this); 8431 if (vlen <= 16 && UseAVX <= 2) { 8432 assert(UseSSE >= 3, "required"); 8433 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8434 } else { 8435 assert(UseAVX > 0, "required"); 8436 int src_vlen_enc = vector_length_encoding(this, $src); 8437 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8438 } 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 8443 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8444 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8445 match(Set dst (VectorStoreMask src size)); 8446 effect(TEMP_DEF dst, TEMP xtmp); 8447 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8448 ins_encode %{ 8449 int vlen_enc = Assembler::AVX_128bit; 8450 int vlen = Matcher::vector_length(this); 8451 if (vlen <= 8) { 8452 assert(UseSSE >= 3, "required"); 8453 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8454 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8455 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8456 } else { 8457 assert(UseAVX > 0, "required"); 8458 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8459 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8460 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8461 } 8462 %} 8463 ins_pipe( pipe_slow ); 8464 %} 8465 8466 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8467 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8468 match(Set dst (VectorStoreMask src size)); 8469 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8470 effect(TEMP_DEF dst, TEMP xtmp); 8471 ins_encode %{ 8472 int vlen_enc = Assembler::AVX_128bit; 8473 int vlen = Matcher::vector_length(this); 8474 if (vlen <= 4) { 8475 assert(UseSSE >= 3, "required"); 8476 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8477 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8478 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8479 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8480 } else { 8481 assert(UseAVX > 0, "required"); 8482 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8483 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8484 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8485 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8486 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8487 } 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8493 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8494 match(Set dst (VectorStoreMask src size)); 8495 effect(TEMP_DEF dst, TEMP xtmp); 8496 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8497 ins_encode %{ 8498 assert(UseSSE >= 3, "required"); 8499 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8500 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8501 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8502 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8503 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8509 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8510 match(Set dst (VectorStoreMask src size)); 8511 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8512 effect(TEMP_DEF dst, TEMP vtmp); 8513 ins_encode %{ 8514 int vlen_enc = Assembler::AVX_128bit; 8515 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8516 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8517 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8518 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8519 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8520 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8521 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8527 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8528 match(Set dst (VectorStoreMask src size)); 8529 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8530 ins_encode %{ 8531 int src_vlen_enc = vector_length_encoding(this, $src); 8532 int dst_vlen_enc = vector_length_encoding(this); 8533 if (!VM_Version::supports_avx512vl()) { 8534 src_vlen_enc = Assembler::AVX_512bit; 8535 } 8536 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8537 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8538 %} 8539 ins_pipe( pipe_slow ); 8540 %} 8541 8542 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8543 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8544 match(Set dst (VectorStoreMask src size)); 8545 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8546 ins_encode %{ 8547 int src_vlen_enc = vector_length_encoding(this, $src); 8548 int dst_vlen_enc = vector_length_encoding(this); 8549 if (!VM_Version::supports_avx512vl()) { 8550 src_vlen_enc = Assembler::AVX_512bit; 8551 } 8552 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8553 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8554 %} 8555 ins_pipe( pipe_slow ); 8556 %} 8557 8558 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8559 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8560 match(Set dst (VectorStoreMask mask size)); 8561 effect(TEMP_DEF dst); 8562 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8563 ins_encode %{ 8564 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8565 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8566 false, Assembler::AVX_512bit, noreg); 8567 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8573 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8574 match(Set dst (VectorStoreMask mask size)); 8575 effect(TEMP_DEF dst); 8576 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8577 ins_encode %{ 8578 int dst_vlen_enc = vector_length_encoding(this); 8579 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8580 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct vmaskcast_evex(kReg dst) %{ 8586 match(Set dst (VectorMaskCast dst)); 8587 ins_cost(0); 8588 format %{ "vector_mask_cast $dst" %} 8589 ins_encode %{ 8590 // empty 8591 %} 8592 ins_pipe(empty); 8593 %} 8594 8595 instruct vmaskcast(vec dst) %{ 8596 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8597 match(Set dst (VectorMaskCast dst)); 8598 ins_cost(0); 8599 format %{ "vector_mask_cast $dst" %} 8600 ins_encode %{ 8601 // empty 8602 %} 8603 ins_pipe(empty); 8604 %} 8605 8606 instruct vmaskcast_avx(vec dst, vec src) %{ 8607 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8608 match(Set dst (VectorMaskCast src)); 8609 format %{ "vector_mask_cast $dst, $src" %} 8610 ins_encode %{ 8611 int vlen = Matcher::vector_length(this); 8612 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8613 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8614 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8615 %} 8616 ins_pipe(pipe_slow); 8617 %} 8618 8619 //-------------------------------- Load Iota Indices ---------------------------------- 8620 8621 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8622 match(Set dst (VectorLoadConst src)); 8623 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8624 ins_encode %{ 8625 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8626 BasicType bt = Matcher::vector_element_basic_type(this); 8627 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8628 %} 8629 ins_pipe( pipe_slow ); 8630 %} 8631 8632 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8633 match(Set dst (PopulateIndex src1 src2)); 8634 effect(TEMP dst, TEMP vtmp); 8635 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8636 ins_encode %{ 8637 assert($src2$$constant == 1, "required"); 8638 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8639 int vlen_enc = vector_length_encoding(this); 8640 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8641 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8642 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8643 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8644 %} 8645 ins_pipe( pipe_slow ); 8646 %} 8647 8648 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8649 match(Set dst (PopulateIndex src1 src2)); 8650 effect(TEMP dst, TEMP vtmp); 8651 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8652 ins_encode %{ 8653 assert($src2$$constant == 1, "required"); 8654 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8655 int vlen_enc = vector_length_encoding(this); 8656 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8657 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8658 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8659 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8660 %} 8661 ins_pipe( pipe_slow ); 8662 %} 8663 8664 //-------------------------------- Rearrange ---------------------------------- 8665 8666 // LoadShuffle/Rearrange for Byte 8667 instruct rearrangeB(vec dst, vec shuffle) %{ 8668 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8669 Matcher::vector_length(n) < 32); 8670 match(Set dst (VectorRearrange dst shuffle)); 8671 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8672 ins_encode %{ 8673 assert(UseSSE >= 4, "required"); 8674 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8680 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8681 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8682 match(Set dst (VectorRearrange src shuffle)); 8683 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8684 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8685 ins_encode %{ 8686 assert(UseAVX >= 2, "required"); 8687 // Swap src into vtmp1 8688 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8689 // Shuffle swapped src to get entries from other 128 bit lane 8690 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8691 // Shuffle original src to get entries from self 128 bit lane 8692 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8693 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8694 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8695 // Perform the blend 8696 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 8702 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8703 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8704 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8705 match(Set dst (VectorRearrange src shuffle)); 8706 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8707 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8708 ins_encode %{ 8709 int vlen_enc = vector_length_encoding(this); 8710 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8711 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8712 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8718 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8719 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8720 match(Set dst (VectorRearrange src shuffle)); 8721 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8722 ins_encode %{ 8723 int vlen_enc = vector_length_encoding(this); 8724 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8725 %} 8726 ins_pipe( pipe_slow ); 8727 %} 8728 8729 // LoadShuffle/Rearrange for Short 8730 8731 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8732 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8733 !VM_Version::supports_avx512bw()); 8734 match(Set dst (VectorLoadShuffle src)); 8735 effect(TEMP dst, TEMP vtmp); 8736 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8737 ins_encode %{ 8738 // Create a byte shuffle mask from short shuffle mask 8739 // only byte shuffle instruction available on these platforms 8740 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8741 if (UseAVX == 0) { 8742 assert(vlen_in_bytes <= 16, "required"); 8743 // Multiply each shuffle by two to get byte index 8744 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8745 __ psllw($vtmp$$XMMRegister, 1); 8746 8747 // Duplicate to create 2 copies of byte index 8748 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8749 __ psllw($dst$$XMMRegister, 8); 8750 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8751 8752 // Add one to get alternate byte index 8753 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8754 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8755 } else { 8756 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8757 int vlen_enc = vector_length_encoding(this); 8758 // Multiply each shuffle by two to get byte index 8759 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8760 8761 // Duplicate to create 2 copies of byte index 8762 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8763 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8764 8765 // Add one to get alternate byte index 8766 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8767 } 8768 %} 8769 ins_pipe( pipe_slow ); 8770 %} 8771 8772 instruct rearrangeS(vec dst, vec shuffle) %{ 8773 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8774 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8775 match(Set dst (VectorRearrange dst shuffle)); 8776 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8777 ins_encode %{ 8778 assert(UseSSE >= 4, "required"); 8779 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8785 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8786 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8787 match(Set dst (VectorRearrange src shuffle)); 8788 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8789 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8790 ins_encode %{ 8791 assert(UseAVX >= 2, "required"); 8792 // Swap src into vtmp1 8793 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8794 // Shuffle swapped src to get entries from other 128 bit lane 8795 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8796 // Shuffle original src to get entries from self 128 bit lane 8797 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8798 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8799 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8800 // Perform the blend 8801 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8802 %} 8803 ins_pipe( pipe_slow ); 8804 %} 8805 8806 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8807 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8808 VM_Version::supports_avx512bw()); 8809 match(Set dst (VectorRearrange src shuffle)); 8810 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8811 ins_encode %{ 8812 int vlen_enc = vector_length_encoding(this); 8813 if (!VM_Version::supports_avx512vl()) { 8814 vlen_enc = Assembler::AVX_512bit; 8815 } 8816 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8817 %} 8818 ins_pipe( pipe_slow ); 8819 %} 8820 8821 // LoadShuffle/Rearrange for Integer and Float 8822 8823 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8824 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8825 Matcher::vector_length(n) == 4 && UseAVX == 0); 8826 match(Set dst (VectorLoadShuffle src)); 8827 effect(TEMP dst, TEMP vtmp); 8828 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8829 ins_encode %{ 8830 assert(UseSSE >= 4, "required"); 8831 8832 // Create a byte shuffle mask from int shuffle mask 8833 // only byte shuffle instruction available on these platforms 8834 8835 // Duplicate and multiply each shuffle by 4 8836 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8837 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8838 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8839 __ psllw($vtmp$$XMMRegister, 2); 8840 8841 // Duplicate again to create 4 copies of byte index 8842 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8843 __ psllw($dst$$XMMRegister, 8); 8844 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8845 8846 // Add 3,2,1,0 to get alternate byte index 8847 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8848 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8849 %} 8850 ins_pipe( pipe_slow ); 8851 %} 8852 8853 instruct rearrangeI(vec dst, vec shuffle) %{ 8854 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8855 UseAVX == 0); 8856 match(Set dst (VectorRearrange dst shuffle)); 8857 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8858 ins_encode %{ 8859 assert(UseSSE >= 4, "required"); 8860 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8861 %} 8862 ins_pipe( pipe_slow ); 8863 %} 8864 8865 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8866 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8867 UseAVX > 0); 8868 match(Set dst (VectorRearrange src shuffle)); 8869 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8870 ins_encode %{ 8871 int vlen_enc = vector_length_encoding(this); 8872 BasicType bt = Matcher::vector_element_basic_type(this); 8873 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8874 %} 8875 ins_pipe( pipe_slow ); 8876 %} 8877 8878 // LoadShuffle/Rearrange for Long and Double 8879 8880 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8881 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8882 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8883 match(Set dst (VectorLoadShuffle src)); 8884 effect(TEMP dst, TEMP vtmp); 8885 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8886 ins_encode %{ 8887 assert(UseAVX >= 2, "required"); 8888 8889 int vlen_enc = vector_length_encoding(this); 8890 // Create a double word shuffle mask from long shuffle mask 8891 // only double word shuffle instruction available on these platforms 8892 8893 // Multiply each shuffle by two to get double word index 8894 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8895 8896 // Duplicate each double word shuffle 8897 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8898 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8899 8900 // Add one to get alternate double word index 8901 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8907 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8908 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8909 match(Set dst (VectorRearrange src shuffle)); 8910 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8911 ins_encode %{ 8912 assert(UseAVX >= 2, "required"); 8913 8914 int vlen_enc = vector_length_encoding(this); 8915 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8921 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8922 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8923 match(Set dst (VectorRearrange src shuffle)); 8924 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8925 ins_encode %{ 8926 assert(UseAVX > 2, "required"); 8927 8928 int vlen_enc = vector_length_encoding(this); 8929 if (vlen_enc == Assembler::AVX_128bit) { 8930 vlen_enc = Assembler::AVX_256bit; 8931 } 8932 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8933 %} 8934 ins_pipe( pipe_slow ); 8935 %} 8936 8937 // --------------------------------- FMA -------------------------------------- 8938 // a * b + c 8939 8940 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8941 match(Set c (FmaVF c (Binary a b))); 8942 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8943 ins_cost(150); 8944 ins_encode %{ 8945 assert(UseFMA, "not enabled"); 8946 int vlen_enc = vector_length_encoding(this); 8947 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8953 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8954 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8955 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8956 ins_cost(150); 8957 ins_encode %{ 8958 assert(UseFMA, "not enabled"); 8959 int vlen_enc = vector_length_encoding(this); 8960 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8966 match(Set c (FmaVD c (Binary a b))); 8967 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8968 ins_cost(150); 8969 ins_encode %{ 8970 assert(UseFMA, "not enabled"); 8971 int vlen_enc = vector_length_encoding(this); 8972 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8973 %} 8974 ins_pipe( pipe_slow ); 8975 %} 8976 8977 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8978 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8979 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8980 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8981 ins_cost(150); 8982 ins_encode %{ 8983 assert(UseFMA, "not enabled"); 8984 int vlen_enc = vector_length_encoding(this); 8985 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8986 %} 8987 ins_pipe( pipe_slow ); 8988 %} 8989 8990 // --------------------------------- Vector Multiply Add -------------------------------------- 8991 8992 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8993 predicate(UseAVX == 0); 8994 match(Set dst (MulAddVS2VI dst src1)); 8995 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8996 ins_encode %{ 8997 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8998 %} 8999 ins_pipe( pipe_slow ); 9000 %} 9001 9002 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9003 predicate(UseAVX > 0); 9004 match(Set dst (MulAddVS2VI src1 src2)); 9005 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9006 ins_encode %{ 9007 int vlen_enc = vector_length_encoding(this); 9008 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9009 %} 9010 ins_pipe( pipe_slow ); 9011 %} 9012 9013 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9014 9015 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9016 predicate(VM_Version::supports_avx512_vnni()); 9017 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9018 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9019 ins_encode %{ 9020 assert(UseAVX > 2, "required"); 9021 int vlen_enc = vector_length_encoding(this); 9022 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9023 %} 9024 ins_pipe( pipe_slow ); 9025 ins_cost(10); 9026 %} 9027 9028 // --------------------------------- PopCount -------------------------------------- 9029 9030 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9031 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9032 match(Set dst (PopCountVI src)); 9033 match(Set dst (PopCountVL src)); 9034 format %{ "vector_popcount_integral $dst, $src" %} 9035 ins_encode %{ 9036 int opcode = this->ideal_Opcode(); 9037 int vlen_enc = vector_length_encoding(this, $src); 9038 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9039 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9045 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9046 match(Set dst (PopCountVI src mask)); 9047 match(Set dst (PopCountVL src mask)); 9048 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9049 ins_encode %{ 9050 int vlen_enc = vector_length_encoding(this, $src); 9051 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9052 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9053 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9059 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9060 match(Set dst (PopCountVI src)); 9061 match(Set dst (PopCountVL src)); 9062 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9063 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9064 ins_encode %{ 9065 int opcode = this->ideal_Opcode(); 9066 int vlen_enc = vector_length_encoding(this, $src); 9067 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9068 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9069 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9070 %} 9071 ins_pipe( pipe_slow ); 9072 %} 9073 9074 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9075 9076 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9077 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9078 Matcher::vector_length_in_bytes(n->in(1)))); 9079 match(Set dst (CountTrailingZerosV src)); 9080 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9081 ins_cost(400); 9082 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9083 ins_encode %{ 9084 int vlen_enc = vector_length_encoding(this, $src); 9085 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9086 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9087 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9088 %} 9089 ins_pipe( pipe_slow ); 9090 %} 9091 9092 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9093 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9094 VM_Version::supports_avx512cd() && 9095 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9096 match(Set dst (CountTrailingZerosV src)); 9097 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9098 ins_cost(400); 9099 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9100 ins_encode %{ 9101 int vlen_enc = vector_length_encoding(this, $src); 9102 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9103 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9104 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9110 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9111 match(Set dst (CountTrailingZerosV src)); 9112 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9113 ins_cost(400); 9114 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9115 ins_encode %{ 9116 int vlen_enc = vector_length_encoding(this, $src); 9117 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9118 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9119 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9120 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9126 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9127 match(Set dst (CountTrailingZerosV src)); 9128 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9129 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9130 ins_encode %{ 9131 int vlen_enc = vector_length_encoding(this, $src); 9132 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9133 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9134 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9135 %} 9136 ins_pipe( pipe_slow ); 9137 %} 9138 9139 9140 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9141 9142 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9143 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9144 effect(TEMP dst); 9145 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9146 ins_encode %{ 9147 int vector_len = vector_length_encoding(this); 9148 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9149 %} 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9154 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9155 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9156 effect(TEMP dst); 9157 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9158 ins_encode %{ 9159 int vector_len = vector_length_encoding(this); 9160 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 // --------------------------------- Rotation Operations ---------------------------------- 9166 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9167 match(Set dst (RotateLeftV src shift)); 9168 match(Set dst (RotateRightV src shift)); 9169 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9170 ins_encode %{ 9171 int opcode = this->ideal_Opcode(); 9172 int vector_len = vector_length_encoding(this); 9173 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9174 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vprorate(vec dst, vec src, vec shift) %{ 9180 match(Set dst (RotateLeftV src shift)); 9181 match(Set dst (RotateRightV src shift)); 9182 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9183 ins_encode %{ 9184 int opcode = this->ideal_Opcode(); 9185 int vector_len = vector_length_encoding(this); 9186 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9187 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 // ---------------------------------- Masked Operations ------------------------------------ 9193 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9194 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9195 match(Set dst (LoadVectorMasked mem mask)); 9196 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9197 ins_encode %{ 9198 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9199 int vlen_enc = vector_length_encoding(this); 9200 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9201 %} 9202 ins_pipe( pipe_slow ); 9203 %} 9204 9205 9206 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9207 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9208 match(Set dst (LoadVectorMasked mem mask)); 9209 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9210 ins_encode %{ 9211 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9212 int vector_len = vector_length_encoding(this); 9213 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9219 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9220 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9221 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9222 ins_encode %{ 9223 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9224 int vlen_enc = vector_length_encoding(src_node); 9225 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9226 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9227 %} 9228 ins_pipe( pipe_slow ); 9229 %} 9230 9231 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9232 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9233 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9234 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9235 ins_encode %{ 9236 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9237 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9238 int vlen_enc = vector_length_encoding(src_node); 9239 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9240 %} 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9245 match(Set addr (VerifyVectorAlignment addr mask)); 9246 effect(KILL cr); 9247 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9248 ins_encode %{ 9249 Label Lskip; 9250 // check if masked bits of addr are zero 9251 __ testq($addr$$Register, $mask$$constant); 9252 __ jccb(Assembler::equal, Lskip); 9253 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9254 __ bind(Lskip); 9255 %} 9256 ins_pipe(pipe_slow); 9257 %} 9258 9259 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9260 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9261 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9262 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9263 ins_encode %{ 9264 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9265 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9266 9267 Label DONE; 9268 int vlen_enc = vector_length_encoding(this, $src1); 9269 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9270 9271 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9272 __ mov64($dst$$Register, -1L); 9273 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9274 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9275 __ jccb(Assembler::carrySet, DONE); 9276 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9277 __ notq($dst$$Register); 9278 __ tzcntq($dst$$Register, $dst$$Register); 9279 __ bind(DONE); 9280 %} 9281 ins_pipe( pipe_slow ); 9282 %} 9283 9284 9285 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9286 match(Set dst (VectorMaskGen len)); 9287 effect(TEMP temp, KILL cr); 9288 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9289 ins_encode %{ 9290 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9291 %} 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9296 match(Set dst (VectorMaskGen len)); 9297 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9298 effect(TEMP temp); 9299 ins_encode %{ 9300 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9301 __ kmovql($dst$$KRegister, $temp$$Register); 9302 %} 9303 ins_pipe( pipe_slow ); 9304 %} 9305 9306 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9307 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9308 match(Set dst (VectorMaskToLong mask)); 9309 effect(TEMP dst, KILL cr); 9310 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9311 ins_encode %{ 9312 int opcode = this->ideal_Opcode(); 9313 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9314 int mask_len = Matcher::vector_length(this, $mask); 9315 int mask_size = mask_len * type2aelembytes(mbt); 9316 int vlen_enc = vector_length_encoding(this, $mask); 9317 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9318 $dst$$Register, mask_len, mask_size, vlen_enc); 9319 %} 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9324 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9325 match(Set dst (VectorMaskToLong mask)); 9326 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9327 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9328 ins_encode %{ 9329 int opcode = this->ideal_Opcode(); 9330 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9331 int mask_len = Matcher::vector_length(this, $mask); 9332 int vlen_enc = vector_length_encoding(this, $mask); 9333 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9334 $dst$$Register, mask_len, mbt, vlen_enc); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9340 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9341 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9342 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9343 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9344 ins_encode %{ 9345 int opcode = this->ideal_Opcode(); 9346 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9347 int mask_len = Matcher::vector_length(this, $mask); 9348 int vlen_enc = vector_length_encoding(this, $mask); 9349 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9350 $dst$$Register, mask_len, mbt, vlen_enc); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9356 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9357 match(Set dst (VectorMaskTrueCount mask)); 9358 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9359 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9360 ins_encode %{ 9361 int opcode = this->ideal_Opcode(); 9362 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9363 int mask_len = Matcher::vector_length(this, $mask); 9364 int mask_size = mask_len * type2aelembytes(mbt); 9365 int vlen_enc = vector_length_encoding(this, $mask); 9366 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9367 $tmp$$Register, mask_len, mask_size, vlen_enc); 9368 %} 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9373 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9374 match(Set dst (VectorMaskTrueCount mask)); 9375 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9376 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9377 ins_encode %{ 9378 int opcode = this->ideal_Opcode(); 9379 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9380 int mask_len = Matcher::vector_length(this, $mask); 9381 int vlen_enc = vector_length_encoding(this, $mask); 9382 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9383 $tmp$$Register, mask_len, mbt, vlen_enc); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9389 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9390 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9391 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9392 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9393 ins_encode %{ 9394 int opcode = this->ideal_Opcode(); 9395 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9396 int mask_len = Matcher::vector_length(this, $mask); 9397 int vlen_enc = vector_length_encoding(this, $mask); 9398 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9399 $tmp$$Register, mask_len, mbt, vlen_enc); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9405 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9406 match(Set dst (VectorMaskFirstTrue mask)); 9407 match(Set dst (VectorMaskLastTrue mask)); 9408 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9409 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9410 ins_encode %{ 9411 int opcode = this->ideal_Opcode(); 9412 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9413 int mask_len = Matcher::vector_length(this, $mask); 9414 int mask_size = mask_len * type2aelembytes(mbt); 9415 int vlen_enc = vector_length_encoding(this, $mask); 9416 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9417 $tmp$$Register, mask_len, mask_size, vlen_enc); 9418 %} 9419 ins_pipe( pipe_slow ); 9420 %} 9421 9422 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9423 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9424 match(Set dst (VectorMaskFirstTrue mask)); 9425 match(Set dst (VectorMaskLastTrue mask)); 9426 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9427 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9428 ins_encode %{ 9429 int opcode = this->ideal_Opcode(); 9430 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9431 int mask_len = Matcher::vector_length(this, $mask); 9432 int vlen_enc = vector_length_encoding(this, $mask); 9433 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9434 $tmp$$Register, mask_len, mbt, vlen_enc); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9440 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9441 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9442 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9443 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9444 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9445 ins_encode %{ 9446 int opcode = this->ideal_Opcode(); 9447 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9448 int mask_len = Matcher::vector_length(this, $mask); 9449 int vlen_enc = vector_length_encoding(this, $mask); 9450 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9451 $tmp$$Register, mask_len, mbt, vlen_enc); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 // --------------------------------- Compress/Expand Operations --------------------------- 9457 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9458 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9459 match(Set dst (CompressV src mask)); 9460 match(Set dst (ExpandV src mask)); 9461 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9462 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9463 ins_encode %{ 9464 int opcode = this->ideal_Opcode(); 9465 int vlen_enc = vector_length_encoding(this); 9466 BasicType bt = Matcher::vector_element_basic_type(this); 9467 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9468 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9474 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9475 match(Set dst (CompressV src mask)); 9476 match(Set dst (ExpandV src mask)); 9477 format %{ "vector_compress_expand $dst, $src, $mask" %} 9478 ins_encode %{ 9479 int opcode = this->ideal_Opcode(); 9480 int vector_len = vector_length_encoding(this); 9481 BasicType bt = Matcher::vector_element_basic_type(this); 9482 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9488 match(Set dst (CompressM mask)); 9489 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9490 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9491 ins_encode %{ 9492 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9493 int mask_len = Matcher::vector_length(this); 9494 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9495 %} 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9500 9501 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9502 predicate(!VM_Version::supports_gfni()); 9503 match(Set dst (ReverseV src)); 9504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9505 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9506 ins_encode %{ 9507 int vec_enc = vector_length_encoding(this); 9508 BasicType bt = Matcher::vector_element_basic_type(this); 9509 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9510 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9516 predicate(VM_Version::supports_gfni()); 9517 match(Set dst (ReverseV src)); 9518 effect(TEMP dst, TEMP xtmp); 9519 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9520 ins_encode %{ 9521 int vec_enc = vector_length_encoding(this); 9522 BasicType bt = Matcher::vector_element_basic_type(this); 9523 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9524 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9525 $xtmp$$XMMRegister); 9526 %} 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 instruct vreverse_byte_reg(vec dst, vec src) %{ 9531 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9532 match(Set dst (ReverseBytesV src)); 9533 effect(TEMP dst); 9534 format %{ "vector_reverse_byte $dst, $src" %} 9535 ins_encode %{ 9536 int vec_enc = vector_length_encoding(this); 9537 BasicType bt = Matcher::vector_element_basic_type(this); 9538 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9539 %} 9540 ins_pipe( pipe_slow ); 9541 %} 9542 9543 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9544 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9545 match(Set dst (ReverseBytesV src)); 9546 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9547 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9548 ins_encode %{ 9549 int vec_enc = vector_length_encoding(this); 9550 BasicType bt = Matcher::vector_element_basic_type(this); 9551 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9552 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9553 %} 9554 ins_pipe( pipe_slow ); 9555 %} 9556 9557 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9558 9559 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9560 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9561 Matcher::vector_length_in_bytes(n->in(1)))); 9562 match(Set dst (CountLeadingZerosV src)); 9563 format %{ "vector_count_leading_zeros $dst, $src" %} 9564 ins_encode %{ 9565 int vlen_enc = vector_length_encoding(this, $src); 9566 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9567 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9568 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9574 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9575 Matcher::vector_length_in_bytes(n->in(1)))); 9576 match(Set dst (CountLeadingZerosV src mask)); 9577 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9578 ins_encode %{ 9579 int vlen_enc = vector_length_encoding(this, $src); 9580 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9581 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9582 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9583 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9584 %} 9585 ins_pipe( pipe_slow ); 9586 %} 9587 9588 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9589 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9590 VM_Version::supports_avx512cd() && 9591 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9592 match(Set dst (CountLeadingZerosV src)); 9593 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9594 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9595 ins_encode %{ 9596 int vlen_enc = vector_length_encoding(this, $src); 9597 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9598 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9599 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9605 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9606 match(Set dst (CountLeadingZerosV src)); 9607 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9608 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9609 ins_encode %{ 9610 int vlen_enc = vector_length_encoding(this, $src); 9611 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9612 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9613 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9614 $rtmp$$Register, true, vlen_enc); 9615 %} 9616 ins_pipe( pipe_slow ); 9617 %} 9618 9619 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9620 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9621 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9622 match(Set dst (CountLeadingZerosV src)); 9623 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9624 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9625 ins_encode %{ 9626 int vlen_enc = vector_length_encoding(this, $src); 9627 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9628 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9629 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9635 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9636 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9637 match(Set dst (CountLeadingZerosV src)); 9638 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9639 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9640 ins_encode %{ 9641 int vlen_enc = vector_length_encoding(this, $src); 9642 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9643 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9644 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 // ---------------------------------- Vector Masked Operations ------------------------------------ 9650 9651 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9652 match(Set dst (AddVB (Binary dst src2) mask)); 9653 match(Set dst (AddVS (Binary dst src2) mask)); 9654 match(Set dst (AddVI (Binary dst src2) mask)); 9655 match(Set dst (AddVL (Binary dst src2) mask)); 9656 match(Set dst (AddVF (Binary dst src2) mask)); 9657 match(Set dst (AddVD (Binary dst src2) mask)); 9658 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9659 ins_encode %{ 9660 int vlen_enc = vector_length_encoding(this); 9661 BasicType bt = Matcher::vector_element_basic_type(this); 9662 int opc = this->ideal_Opcode(); 9663 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9664 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9665 %} 9666 ins_pipe( pipe_slow ); 9667 %} 9668 9669 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9670 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9671 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9672 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9673 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9674 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9675 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9676 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9677 ins_encode %{ 9678 int vlen_enc = vector_length_encoding(this); 9679 BasicType bt = Matcher::vector_element_basic_type(this); 9680 int opc = this->ideal_Opcode(); 9681 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9682 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9683 %} 9684 ins_pipe( pipe_slow ); 9685 %} 9686 9687 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9688 match(Set dst (XorV (Binary dst src2) mask)); 9689 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9690 ins_encode %{ 9691 int vlen_enc = vector_length_encoding(this); 9692 BasicType bt = Matcher::vector_element_basic_type(this); 9693 int opc = this->ideal_Opcode(); 9694 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9695 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9701 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9702 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9703 ins_encode %{ 9704 int vlen_enc = vector_length_encoding(this); 9705 BasicType bt = Matcher::vector_element_basic_type(this); 9706 int opc = this->ideal_Opcode(); 9707 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9708 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9709 %} 9710 ins_pipe( pipe_slow ); 9711 %} 9712 9713 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9714 match(Set dst (OrV (Binary dst src2) mask)); 9715 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9716 ins_encode %{ 9717 int vlen_enc = vector_length_encoding(this); 9718 BasicType bt = Matcher::vector_element_basic_type(this); 9719 int opc = this->ideal_Opcode(); 9720 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9721 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9722 %} 9723 ins_pipe( pipe_slow ); 9724 %} 9725 9726 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9727 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9728 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9729 ins_encode %{ 9730 int vlen_enc = vector_length_encoding(this); 9731 BasicType bt = Matcher::vector_element_basic_type(this); 9732 int opc = this->ideal_Opcode(); 9733 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9734 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9735 %} 9736 ins_pipe( pipe_slow ); 9737 %} 9738 9739 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9740 match(Set dst (AndV (Binary dst src2) mask)); 9741 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9742 ins_encode %{ 9743 int vlen_enc = vector_length_encoding(this); 9744 BasicType bt = Matcher::vector_element_basic_type(this); 9745 int opc = this->ideal_Opcode(); 9746 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9747 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9753 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9754 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9755 ins_encode %{ 9756 int vlen_enc = vector_length_encoding(this); 9757 BasicType bt = Matcher::vector_element_basic_type(this); 9758 int opc = this->ideal_Opcode(); 9759 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9760 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9761 %} 9762 ins_pipe( pipe_slow ); 9763 %} 9764 9765 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9766 match(Set dst (SubVB (Binary dst src2) mask)); 9767 match(Set dst (SubVS (Binary dst src2) mask)); 9768 match(Set dst (SubVI (Binary dst src2) mask)); 9769 match(Set dst (SubVL (Binary dst src2) mask)); 9770 match(Set dst (SubVF (Binary dst src2) mask)); 9771 match(Set dst (SubVD (Binary dst src2) mask)); 9772 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9773 ins_encode %{ 9774 int vlen_enc = vector_length_encoding(this); 9775 BasicType bt = Matcher::vector_element_basic_type(this); 9776 int opc = this->ideal_Opcode(); 9777 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9778 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9784 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9785 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9786 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9787 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9788 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9789 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9790 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9791 ins_encode %{ 9792 int vlen_enc = vector_length_encoding(this); 9793 BasicType bt = Matcher::vector_element_basic_type(this); 9794 int opc = this->ideal_Opcode(); 9795 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9796 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9797 %} 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9802 match(Set dst (MulVS (Binary dst src2) mask)); 9803 match(Set dst (MulVI (Binary dst src2) mask)); 9804 match(Set dst (MulVL (Binary dst src2) mask)); 9805 match(Set dst (MulVF (Binary dst src2) mask)); 9806 match(Set dst (MulVD (Binary dst src2) mask)); 9807 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9808 ins_encode %{ 9809 int vlen_enc = vector_length_encoding(this); 9810 BasicType bt = Matcher::vector_element_basic_type(this); 9811 int opc = this->ideal_Opcode(); 9812 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9813 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9819 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9820 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9821 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9822 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9823 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9824 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9825 ins_encode %{ 9826 int vlen_enc = vector_length_encoding(this); 9827 BasicType bt = Matcher::vector_element_basic_type(this); 9828 int opc = this->ideal_Opcode(); 9829 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9830 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9836 match(Set dst (SqrtVF dst mask)); 9837 match(Set dst (SqrtVD dst mask)); 9838 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9839 ins_encode %{ 9840 int vlen_enc = vector_length_encoding(this); 9841 BasicType bt = Matcher::vector_element_basic_type(this); 9842 int opc = this->ideal_Opcode(); 9843 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9844 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9850 match(Set dst (DivVF (Binary dst src2) mask)); 9851 match(Set dst (DivVD (Binary dst src2) mask)); 9852 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9853 ins_encode %{ 9854 int vlen_enc = vector_length_encoding(this); 9855 BasicType bt = Matcher::vector_element_basic_type(this); 9856 int opc = this->ideal_Opcode(); 9857 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9858 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9864 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9865 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9866 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9867 ins_encode %{ 9868 int vlen_enc = vector_length_encoding(this); 9869 BasicType bt = Matcher::vector_element_basic_type(this); 9870 int opc = this->ideal_Opcode(); 9871 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9872 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9873 %} 9874 ins_pipe( pipe_slow ); 9875 %} 9876 9877 9878 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9879 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9880 match(Set dst (RotateRightV (Binary dst shift) mask)); 9881 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9882 ins_encode %{ 9883 int vlen_enc = vector_length_encoding(this); 9884 BasicType bt = Matcher::vector_element_basic_type(this); 9885 int opc = this->ideal_Opcode(); 9886 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9887 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9888 %} 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9893 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9894 match(Set dst (RotateRightV (Binary dst src2) mask)); 9895 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9896 ins_encode %{ 9897 int vlen_enc = vector_length_encoding(this); 9898 BasicType bt = Matcher::vector_element_basic_type(this); 9899 int opc = this->ideal_Opcode(); 9900 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9901 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9907 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9908 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9909 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9910 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9911 ins_encode %{ 9912 int vlen_enc = vector_length_encoding(this); 9913 BasicType bt = Matcher::vector_element_basic_type(this); 9914 int opc = this->ideal_Opcode(); 9915 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9916 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9917 %} 9918 ins_pipe( pipe_slow ); 9919 %} 9920 9921 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9922 predicate(!n->as_ShiftV()->is_var_shift()); 9923 match(Set dst (LShiftVS (Binary dst src2) mask)); 9924 match(Set dst (LShiftVI (Binary dst src2) mask)); 9925 match(Set dst (LShiftVL (Binary dst src2) mask)); 9926 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9927 ins_encode %{ 9928 int vlen_enc = vector_length_encoding(this); 9929 BasicType bt = Matcher::vector_element_basic_type(this); 9930 int opc = this->ideal_Opcode(); 9931 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9932 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9933 %} 9934 ins_pipe( pipe_slow ); 9935 %} 9936 9937 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9938 predicate(n->as_ShiftV()->is_var_shift()); 9939 match(Set dst (LShiftVS (Binary dst src2) mask)); 9940 match(Set dst (LShiftVI (Binary dst src2) mask)); 9941 match(Set dst (LShiftVL (Binary dst src2) mask)); 9942 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9943 ins_encode %{ 9944 int vlen_enc = vector_length_encoding(this); 9945 BasicType bt = Matcher::vector_element_basic_type(this); 9946 int opc = this->ideal_Opcode(); 9947 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9948 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9954 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9955 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9956 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9957 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9958 ins_encode %{ 9959 int vlen_enc = vector_length_encoding(this); 9960 BasicType bt = Matcher::vector_element_basic_type(this); 9961 int opc = this->ideal_Opcode(); 9962 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9963 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9964 %} 9965 ins_pipe( pipe_slow ); 9966 %} 9967 9968 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9969 predicate(!n->as_ShiftV()->is_var_shift()); 9970 match(Set dst (RShiftVS (Binary dst src2) mask)); 9971 match(Set dst (RShiftVI (Binary dst src2) mask)); 9972 match(Set dst (RShiftVL (Binary dst src2) mask)); 9973 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9974 ins_encode %{ 9975 int vlen_enc = vector_length_encoding(this); 9976 BasicType bt = Matcher::vector_element_basic_type(this); 9977 int opc = this->ideal_Opcode(); 9978 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9979 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9985 predicate(n->as_ShiftV()->is_var_shift()); 9986 match(Set dst (RShiftVS (Binary dst src2) mask)); 9987 match(Set dst (RShiftVI (Binary dst src2) mask)); 9988 match(Set dst (RShiftVL (Binary dst src2) mask)); 9989 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9990 ins_encode %{ 9991 int vlen_enc = vector_length_encoding(this); 9992 BasicType bt = Matcher::vector_element_basic_type(this); 9993 int opc = this->ideal_Opcode(); 9994 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9995 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10001 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10002 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10003 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10004 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10005 ins_encode %{ 10006 int vlen_enc = vector_length_encoding(this); 10007 BasicType bt = Matcher::vector_element_basic_type(this); 10008 int opc = this->ideal_Opcode(); 10009 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10010 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10011 %} 10012 ins_pipe( pipe_slow ); 10013 %} 10014 10015 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10016 predicate(!n->as_ShiftV()->is_var_shift()); 10017 match(Set dst (URShiftVS (Binary dst src2) mask)); 10018 match(Set dst (URShiftVI (Binary dst src2) mask)); 10019 match(Set dst (URShiftVL (Binary dst src2) mask)); 10020 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10021 ins_encode %{ 10022 int vlen_enc = vector_length_encoding(this); 10023 BasicType bt = Matcher::vector_element_basic_type(this); 10024 int opc = this->ideal_Opcode(); 10025 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10026 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10027 %} 10028 ins_pipe( pipe_slow ); 10029 %} 10030 10031 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10032 predicate(n->as_ShiftV()->is_var_shift()); 10033 match(Set dst (URShiftVS (Binary dst src2) mask)); 10034 match(Set dst (URShiftVI (Binary dst src2) mask)); 10035 match(Set dst (URShiftVL (Binary dst src2) mask)); 10036 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10037 ins_encode %{ 10038 int vlen_enc = vector_length_encoding(this); 10039 BasicType bt = Matcher::vector_element_basic_type(this); 10040 int opc = this->ideal_Opcode(); 10041 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10042 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10043 %} 10044 ins_pipe( pipe_slow ); 10045 %} 10046 10047 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10048 match(Set dst (MaxV (Binary dst src2) mask)); 10049 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10050 ins_encode %{ 10051 int vlen_enc = vector_length_encoding(this); 10052 BasicType bt = Matcher::vector_element_basic_type(this); 10053 int opc = this->ideal_Opcode(); 10054 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10055 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10061 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10062 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10063 ins_encode %{ 10064 int vlen_enc = vector_length_encoding(this); 10065 BasicType bt = Matcher::vector_element_basic_type(this); 10066 int opc = this->ideal_Opcode(); 10067 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10068 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10069 %} 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10074 match(Set dst (MinV (Binary dst src2) mask)); 10075 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10076 ins_encode %{ 10077 int vlen_enc = vector_length_encoding(this); 10078 BasicType bt = Matcher::vector_element_basic_type(this); 10079 int opc = this->ideal_Opcode(); 10080 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10081 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10082 %} 10083 ins_pipe( pipe_slow ); 10084 %} 10085 10086 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10087 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10088 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10089 ins_encode %{ 10090 int vlen_enc = vector_length_encoding(this); 10091 BasicType bt = Matcher::vector_element_basic_type(this); 10092 int opc = this->ideal_Opcode(); 10093 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10094 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10095 %} 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10100 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10101 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10102 ins_encode %{ 10103 int vlen_enc = vector_length_encoding(this); 10104 BasicType bt = Matcher::vector_element_basic_type(this); 10105 int opc = this->ideal_Opcode(); 10106 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10107 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10108 %} 10109 ins_pipe( pipe_slow ); 10110 %} 10111 10112 instruct vabs_masked(vec dst, kReg mask) %{ 10113 match(Set dst (AbsVB dst mask)); 10114 match(Set dst (AbsVS dst mask)); 10115 match(Set dst (AbsVI dst mask)); 10116 match(Set dst (AbsVL dst mask)); 10117 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10118 ins_encode %{ 10119 int vlen_enc = vector_length_encoding(this); 10120 BasicType bt = Matcher::vector_element_basic_type(this); 10121 int opc = this->ideal_Opcode(); 10122 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10123 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10129 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10130 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10131 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10132 ins_encode %{ 10133 assert(UseFMA, "Needs FMA instructions support."); 10134 int vlen_enc = vector_length_encoding(this); 10135 BasicType bt = Matcher::vector_element_basic_type(this); 10136 int opc = this->ideal_Opcode(); 10137 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10138 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10139 %} 10140 ins_pipe( pipe_slow ); 10141 %} 10142 10143 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10144 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10145 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10146 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10147 ins_encode %{ 10148 assert(UseFMA, "Needs FMA instructions support."); 10149 int vlen_enc = vector_length_encoding(this); 10150 BasicType bt = Matcher::vector_element_basic_type(this); 10151 int opc = this->ideal_Opcode(); 10152 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10153 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10154 %} 10155 ins_pipe( pipe_slow ); 10156 %} 10157 10158 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10159 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10160 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10161 ins_encode %{ 10162 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10163 int vlen_enc = vector_length_encoding(this, $src1); 10164 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10165 10166 // Comparison i 10167 switch (src1_elem_bt) { 10168 case T_BYTE: { 10169 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10170 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10171 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10172 break; 10173 } 10174 case T_SHORT: { 10175 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10176 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10177 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10178 break; 10179 } 10180 case T_INT: { 10181 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10182 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10183 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10184 break; 10185 } 10186 case T_LONG: { 10187 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10188 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10189 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10190 break; 10191 } 10192 case T_FLOAT: { 10193 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10194 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10195 break; 10196 } 10197 case T_DOUBLE: { 10198 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10199 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10200 break; 10201 } 10202 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10203 } 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10209 predicate(Matcher::vector_length(n) <= 32); 10210 match(Set dst (MaskAll src)); 10211 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10212 ins_encode %{ 10213 int mask_len = Matcher::vector_length(this); 10214 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10215 %} 10216 ins_pipe( pipe_slow ); 10217 %} 10218 10219 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10220 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10221 match(Set dst (XorVMask src (MaskAll cnt))); 10222 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10223 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10224 ins_encode %{ 10225 uint masklen = Matcher::vector_length(this); 10226 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10227 %} 10228 ins_pipe( pipe_slow ); 10229 %} 10230 10231 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10232 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10233 (Matcher::vector_length(n) == 16) || 10234 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10235 match(Set dst (XorVMask src (MaskAll cnt))); 10236 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10237 ins_encode %{ 10238 uint masklen = Matcher::vector_length(this); 10239 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10240 %} 10241 ins_pipe( pipe_slow ); 10242 %} 10243 10244 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10245 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10246 match(Set dst (VectorLongToMask src)); 10247 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10248 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10249 ins_encode %{ 10250 int mask_len = Matcher::vector_length(this); 10251 int vec_enc = vector_length_encoding(mask_len); 10252 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10253 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10254 %} 10255 ins_pipe( pipe_slow ); 10256 %} 10257 10258 10259 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10260 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10261 match(Set dst (VectorLongToMask src)); 10262 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10263 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10264 ins_encode %{ 10265 int mask_len = Matcher::vector_length(this); 10266 assert(mask_len <= 32, "invalid mask length"); 10267 int vec_enc = vector_length_encoding(mask_len); 10268 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10269 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10270 %} 10271 ins_pipe( pipe_slow ); 10272 %} 10273 10274 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10275 predicate(n->bottom_type()->isa_vectmask()); 10276 match(Set dst (VectorLongToMask src)); 10277 format %{ "long_to_mask_evex $dst, $src\t!" %} 10278 ins_encode %{ 10279 __ kmov($dst$$KRegister, $src$$Register); 10280 %} 10281 ins_pipe( pipe_slow ); 10282 %} 10283 10284 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10285 match(Set dst (AndVMask src1 src2)); 10286 match(Set dst (OrVMask src1 src2)); 10287 match(Set dst (XorVMask src1 src2)); 10288 effect(TEMP kscratch); 10289 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10290 ins_encode %{ 10291 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10292 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10293 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10294 uint masklen = Matcher::vector_length(this); 10295 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10296 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10297 %} 10298 ins_pipe( pipe_slow ); 10299 %} 10300 10301 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10302 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10303 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10304 ins_encode %{ 10305 int vlen_enc = vector_length_encoding(this); 10306 BasicType bt = Matcher::vector_element_basic_type(this); 10307 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10308 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10309 %} 10310 ins_pipe( pipe_slow ); 10311 %} 10312 10313 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10314 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10315 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10316 ins_encode %{ 10317 int vlen_enc = vector_length_encoding(this); 10318 BasicType bt = Matcher::vector_element_basic_type(this); 10319 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10320 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10321 %} 10322 ins_pipe( pipe_slow ); 10323 %} 10324 10325 instruct castMM(kReg dst) 10326 %{ 10327 match(Set dst (CastVV dst)); 10328 10329 size(0); 10330 format %{ "# castVV of $dst" %} 10331 ins_encode(/* empty encoding */); 10332 ins_cost(0); 10333 ins_pipe(empty); 10334 %} 10335 10336 instruct castVV(vec dst) 10337 %{ 10338 match(Set dst (CastVV dst)); 10339 10340 size(0); 10341 format %{ "# castVV of $dst" %} 10342 ins_encode(/* empty encoding */); 10343 ins_cost(0); 10344 ins_pipe(empty); 10345 %} 10346 10347 instruct castVVLeg(legVec dst) 10348 %{ 10349 match(Set dst (CastVV dst)); 10350 10351 size(0); 10352 format %{ "# castVV of $dst" %} 10353 ins_encode(/* empty encoding */); 10354 ins_cost(0); 10355 ins_pipe(empty); 10356 %} 10357 10358 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10359 %{ 10360 match(Set dst (IsInfiniteF src)); 10361 effect(TEMP ktmp, KILL cr); 10362 format %{ "float_class_check $dst, $src" %} 10363 ins_encode %{ 10364 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10365 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10366 %} 10367 ins_pipe(pipe_slow); 10368 %} 10369 10370 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10371 %{ 10372 match(Set dst (IsInfiniteD src)); 10373 effect(TEMP ktmp, KILL cr); 10374 format %{ "double_class_check $dst, $src" %} 10375 ins_encode %{ 10376 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10377 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10378 %} 10379 ins_pipe(pipe_slow); 10380 %} 10381 10382 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10383 %{ 10384 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10385 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10386 match(Set dst (SaturatingAddV src1 src2)); 10387 match(Set dst (SaturatingSubV src1 src2)); 10388 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10389 ins_encode %{ 10390 int vlen_enc = vector_length_encoding(this); 10391 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10392 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10393 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10394 %} 10395 ins_pipe(pipe_slow); 10396 %} 10397 10398 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10399 %{ 10400 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10401 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10402 match(Set dst (SaturatingAddV src1 src2)); 10403 match(Set dst (SaturatingSubV src1 src2)); 10404 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10405 ins_encode %{ 10406 int vlen_enc = vector_length_encoding(this); 10407 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10408 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10409 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10410 %} 10411 ins_pipe(pipe_slow); 10412 %} 10413 10414 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10415 %{ 10416 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10417 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10418 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10419 match(Set dst (SaturatingAddV src1 src2)); 10420 match(Set dst (SaturatingSubV src1 src2)); 10421 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10422 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10423 ins_encode %{ 10424 int vlen_enc = vector_length_encoding(this); 10425 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10426 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10427 $src1$$XMMRegister, $src2$$XMMRegister, 10428 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10429 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10430 %} 10431 ins_pipe(pipe_slow); 10432 %} 10433 10434 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10435 %{ 10436 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10437 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10438 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10439 match(Set dst (SaturatingAddV src1 src2)); 10440 match(Set dst (SaturatingSubV src1 src2)); 10441 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10442 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10443 ins_encode %{ 10444 int vlen_enc = vector_length_encoding(this); 10445 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10446 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10447 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10448 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10449 %} 10450 ins_pipe(pipe_slow); 10451 %} 10452 10453 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10454 %{ 10455 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10456 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10457 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10458 match(Set dst (SaturatingAddV src1 src2)); 10459 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10460 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10461 ins_encode %{ 10462 int vlen_enc = vector_length_encoding(this); 10463 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10464 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10465 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10466 %} 10467 ins_pipe(pipe_slow); 10468 %} 10469 10470 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10471 %{ 10472 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10473 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10474 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10475 match(Set dst (SaturatingAddV src1 src2)); 10476 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10477 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10478 ins_encode %{ 10479 int vlen_enc = vector_length_encoding(this); 10480 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10481 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10482 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10483 %} 10484 ins_pipe(pipe_slow); 10485 %} 10486 10487 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10488 %{ 10489 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10490 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10491 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10492 match(Set dst (SaturatingSubV src1 src2)); 10493 effect(TEMP ktmp); 10494 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10495 ins_encode %{ 10496 int vlen_enc = vector_length_encoding(this); 10497 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10498 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10499 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10500 %} 10501 ins_pipe(pipe_slow); 10502 %} 10503 10504 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10505 %{ 10506 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10507 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10508 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10509 match(Set dst (SaturatingSubV src1 src2)); 10510 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10511 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10512 ins_encode %{ 10513 int vlen_enc = vector_length_encoding(this); 10514 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10515 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10516 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10517 %} 10518 ins_pipe(pipe_slow); 10519 %} 10520 10521 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10522 %{ 10523 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10524 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10525 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10526 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10527 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10528 ins_encode %{ 10529 int vlen_enc = vector_length_encoding(this); 10530 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10531 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10532 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10533 %} 10534 ins_pipe(pipe_slow); 10535 %} 10536 10537 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10538 %{ 10539 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10540 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10541 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10542 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10543 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10544 ins_encode %{ 10545 int vlen_enc = vector_length_encoding(this); 10546 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10547 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10548 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10549 %} 10550 ins_pipe(pipe_slow); 10551 %} 10552 10553 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10554 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10555 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10556 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10557 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10558 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10559 ins_encode %{ 10560 int vlen_enc = vector_length_encoding(this); 10561 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10562 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10563 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10564 %} 10565 ins_pipe( pipe_slow ); 10566 %} 10567 10568 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10569 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10570 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10571 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10572 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10573 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10574 ins_encode %{ 10575 int vlen_enc = vector_length_encoding(this); 10576 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10577 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10578 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10579 %} 10580 ins_pipe( pipe_slow ); 10581 %} 10582 10583 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10584 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10585 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10586 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10587 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10588 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10589 ins_encode %{ 10590 int vlen_enc = vector_length_encoding(this); 10591 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10592 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10593 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10594 %} 10595 ins_pipe( pipe_slow ); 10596 %} 10597 10598 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10599 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10600 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10601 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10602 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10603 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10604 ins_encode %{ 10605 int vlen_enc = vector_length_encoding(this); 10606 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10607 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10608 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10609 %} 10610 ins_pipe( pipe_slow ); 10611 %} 10612 10613 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10614 %{ 10615 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10616 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10617 ins_encode %{ 10618 int vlen_enc = vector_length_encoding(this); 10619 BasicType bt = Matcher::vector_element_basic_type(this); 10620 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10621 %} 10622 ins_pipe(pipe_slow); 10623 %} 10624 10625 instruct reinterpretS2HF(regF dst, rRegI src) 10626 %{ 10627 match(Set dst (ReinterpretS2HF src)); 10628 format %{ "vmovw $dst, $src" %} 10629 ins_encode %{ 10630 __ vmovw($dst$$XMMRegister, $src$$Register); 10631 %} 10632 ins_pipe(pipe_slow); 10633 %} 10634 10635 instruct reinterpretHF2S(rRegI dst, regF src) 10636 %{ 10637 match(Set dst (ReinterpretHF2S src)); 10638 format %{ "vmovw $dst, $src" %} 10639 ins_encode %{ 10640 __ vmovw($dst$$Register, $src$$XMMRegister); 10641 %} 10642 ins_pipe(pipe_slow); 10643 %} 10644 10645 instruct convF2HFAndS2HF(regF dst, regF src) 10646 %{ 10647 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10648 format %{ "convF2HFAndS2HF $dst, $src" %} 10649 ins_encode %{ 10650 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10651 %} 10652 ins_pipe(pipe_slow); 10653 %} 10654 10655 instruct convHF2SAndHF2F(regF dst, regF src) 10656 %{ 10657 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10658 format %{ "convHF2SAndHF2F $dst, $src" %} 10659 ins_encode %{ 10660 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10661 %} 10662 ins_pipe(pipe_slow); 10663 %} 10664 10665 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10666 %{ 10667 match(Set dst (SqrtHF src)); 10668 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10669 ins_encode %{ 10670 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10671 %} 10672 ins_pipe(pipe_slow); 10673 %} 10674 10675 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10676 %{ 10677 match(Set dst (AddHF src1 src2)); 10678 match(Set dst (DivHF src1 src2)); 10679 match(Set dst (MulHF src1 src2)); 10680 match(Set dst (SubHF src1 src2)); 10681 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10682 ins_encode %{ 10683 int opcode = this->ideal_Opcode(); 10684 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10685 %} 10686 ins_pipe(pipe_slow); 10687 %} 10688 10689 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10690 %{ 10691 match(Set dst (MaxHF src1 src2)); 10692 match(Set dst (MinHF src1 src2)); 10693 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10694 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10695 ins_encode %{ 10696 int opcode = this->ideal_Opcode(); 10697 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10698 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10699 %} 10700 ins_pipe( pipe_slow ); 10701 %} 10702 10703 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10704 %{ 10705 match(Set dst (FmaHF src2 (Binary dst src1))); 10706 effect(DEF dst); 10707 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10708 ins_encode %{ 10709 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10710 %} 10711 ins_pipe( pipe_slow ); 10712 %} 10713 10714 10715 instruct vector_sqrt_HF_reg(vec dst, vec src) 10716 %{ 10717 match(Set dst (SqrtVHF src)); 10718 format %{ "vector_sqrt_fp16 $dst, $src" %} 10719 ins_encode %{ 10720 int vlen_enc = vector_length_encoding(this); 10721 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10722 %} 10723 ins_pipe(pipe_slow); 10724 %} 10725 10726 instruct vector_sqrt_HF_mem(vec dst, memory src) 10727 %{ 10728 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10729 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10730 ins_encode %{ 10731 int vlen_enc = vector_length_encoding(this); 10732 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10733 %} 10734 ins_pipe(pipe_slow); 10735 %} 10736 10737 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10738 %{ 10739 match(Set dst (AddVHF src1 src2)); 10740 match(Set dst (DivVHF src1 src2)); 10741 match(Set dst (MulVHF src1 src2)); 10742 match(Set dst (SubVHF src1 src2)); 10743 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10744 ins_encode %{ 10745 int vlen_enc = vector_length_encoding(this); 10746 int opcode = this->ideal_Opcode(); 10747 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10748 %} 10749 ins_pipe(pipe_slow); 10750 %} 10751 10752 10753 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10754 %{ 10755 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10756 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10757 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10758 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10759 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10760 ins_encode %{ 10761 int vlen_enc = vector_length_encoding(this); 10762 int opcode = this->ideal_Opcode(); 10763 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10764 %} 10765 ins_pipe(pipe_slow); 10766 %} 10767 10768 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10769 %{ 10770 match(Set dst (FmaVHF src2 (Binary dst src1))); 10771 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10772 ins_encode %{ 10773 int vlen_enc = vector_length_encoding(this); 10774 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10780 %{ 10781 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10782 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10783 ins_encode %{ 10784 int vlen_enc = vector_length_encoding(this); 10785 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10786 %} 10787 ins_pipe( pipe_slow ); 10788 %} 10789 10790 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 10791 %{ 10792 match(Set dst (MinVHF src1 src2)); 10793 match(Set dst (MaxVHF src1 src2)); 10794 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10795 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10796 ins_encode %{ 10797 int vlen_enc = vector_length_encoding(this); 10798 int opcode = this->ideal_Opcode(); 10799 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 10800 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10801 %} 10802 ins_pipe( pipe_slow ); 10803 %}