1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1835 return false; 1836 } 1837 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1838 return false; 1839 } 1840 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1841 return false; 1842 } 1843 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1844 return false; 1845 } 1846 break; 1847 case Op_MaskAll: 1848 if (!VM_Version::supports_evex()) { 1849 return false; 1850 } 1851 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1852 return false; 1853 } 1854 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1855 return false; 1856 } 1857 break; 1858 case Op_VectorMaskCmp: 1859 if (vlen < 2 || size_in_bits < 32) { 1860 return false; 1861 } 1862 break; 1863 case Op_CompressM: 1864 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1865 return false; 1866 } 1867 break; 1868 case Op_CompressV: 1869 case Op_ExpandV: 1870 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1871 return false; 1872 } 1873 if (size_in_bits < 128 ) { 1874 return false; 1875 } 1876 case Op_VectorLongToMask: 1877 if (UseAVX < 1) { 1878 return false; 1879 } 1880 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1881 return false; 1882 } 1883 break; 1884 case Op_SignumVD: 1885 case Op_SignumVF: 1886 if (UseAVX < 1) { 1887 return false; 1888 } 1889 break; 1890 case Op_PopCountVI: 1891 case Op_PopCountVL: { 1892 if (!is_pop_count_instr_target(bt) && 1893 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1894 return false; 1895 } 1896 } 1897 break; 1898 case Op_ReverseV: 1899 case Op_ReverseBytesV: 1900 if (UseAVX < 2) { 1901 return false; 1902 } 1903 break; 1904 case Op_CountTrailingZerosV: 1905 case Op_CountLeadingZerosV: 1906 if (UseAVX < 2) { 1907 return false; 1908 } 1909 break; 1910 } 1911 return true; // Per default match rules are supported. 1912 } 1913 1914 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1915 // ADLC based match_rule_supported routine checks for the existence of pattern based 1916 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1917 // of their non-masked counterpart with mask edge being the differentiator. 1918 // This routine does a strict check on the existence of masked operation patterns 1919 // by returning a default false value for all the other opcodes apart from the 1920 // ones whose masked instruction patterns are defined in this file. 1921 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1922 return false; 1923 } 1924 1925 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1926 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 switch(opcode) { 1930 // Unary masked operations 1931 case Op_AbsVB: 1932 case Op_AbsVS: 1933 if(!VM_Version::supports_avx512bw()) { 1934 return false; // Implementation limitation 1935 } 1936 case Op_AbsVI: 1937 case Op_AbsVL: 1938 return true; 1939 1940 // Ternary masked operations 1941 case Op_FmaVF: 1942 case Op_FmaVD: 1943 return true; 1944 1945 case Op_MacroLogicV: 1946 if(bt != T_INT && bt != T_LONG) { 1947 return false; 1948 } 1949 return true; 1950 1951 // Binary masked operations 1952 case Op_AddVB: 1953 case Op_AddVS: 1954 case Op_SubVB: 1955 case Op_SubVS: 1956 case Op_MulVS: 1957 case Op_LShiftVS: 1958 case Op_RShiftVS: 1959 case Op_URShiftVS: 1960 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1961 if (!VM_Version::supports_avx512bw()) { 1962 return false; // Implementation limitation 1963 } 1964 return true; 1965 1966 case Op_MulVL: 1967 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1968 if (!VM_Version::supports_avx512dq()) { 1969 return false; // Implementation limitation 1970 } 1971 return true; 1972 1973 case Op_AndV: 1974 case Op_OrV: 1975 case Op_XorV: 1976 case Op_RotateRightV: 1977 case Op_RotateLeftV: 1978 if (bt != T_INT && bt != T_LONG) { 1979 return false; // Implementation limitation 1980 } 1981 return true; 1982 1983 case Op_VectorLoadMask: 1984 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1985 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 return true; 1989 1990 case Op_AddVI: 1991 case Op_AddVL: 1992 case Op_AddVF: 1993 case Op_AddVD: 1994 case Op_SubVI: 1995 case Op_SubVL: 1996 case Op_SubVF: 1997 case Op_SubVD: 1998 case Op_MulVI: 1999 case Op_MulVF: 2000 case Op_MulVD: 2001 case Op_DivVF: 2002 case Op_DivVD: 2003 case Op_SqrtVF: 2004 case Op_SqrtVD: 2005 case Op_LShiftVI: 2006 case Op_LShiftVL: 2007 case Op_RShiftVI: 2008 case Op_RShiftVL: 2009 case Op_URShiftVI: 2010 case Op_URShiftVL: 2011 case Op_LoadVectorMasked: 2012 case Op_StoreVectorMasked: 2013 case Op_LoadVectorGatherMasked: 2014 case Op_StoreVectorScatterMasked: 2015 return true; 2016 2017 case Op_UMinV: 2018 case Op_UMaxV: 2019 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2020 return false; 2021 } // fallthrough 2022 case Op_MaxV: 2023 case Op_MinV: 2024 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2025 return false; // Implementation limitation 2026 } 2027 if (is_floating_point_type(bt)) { 2028 return false; // Implementation limitation 2029 } 2030 return true; 2031 case Op_SaturatingAddV: 2032 case Op_SaturatingSubV: 2033 if (!is_subword_type(bt)) { 2034 return false; 2035 } 2036 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2037 return false; // Implementation limitation 2038 } 2039 return true; 2040 2041 case Op_VectorMaskCmp: 2042 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 return true; 2046 2047 case Op_VectorRearrange: 2048 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2049 return false; // Implementation limitation 2050 } 2051 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2052 return false; // Implementation limitation 2053 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2054 return false; // Implementation limitation 2055 } 2056 return true; 2057 2058 // Binary Logical operations 2059 case Op_AndVMask: 2060 case Op_OrVMask: 2061 case Op_XorVMask: 2062 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2063 return false; // Implementation limitation 2064 } 2065 return true; 2066 2067 case Op_PopCountVI: 2068 case Op_PopCountVL: 2069 if (!is_pop_count_instr_target(bt)) { 2070 return false; 2071 } 2072 return true; 2073 2074 case Op_MaskAll: 2075 return true; 2076 2077 case Op_CountLeadingZerosV: 2078 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2079 return true; 2080 } 2081 default: 2082 return false; 2083 } 2084 } 2085 2086 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2087 return false; 2088 } 2089 2090 // Return true if Vector::rearrange needs preparation of the shuffle argument 2091 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2092 switch (elem_bt) { 2093 case T_BYTE: return false; 2094 case T_SHORT: return !VM_Version::supports_avx512bw(); 2095 case T_INT: return !VM_Version::supports_avx(); 2096 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2097 default: 2098 ShouldNotReachHere(); 2099 return false; 2100 } 2101 } 2102 2103 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2104 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2105 bool legacy = (generic_opnd->opcode() == LEGVEC); 2106 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2107 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2108 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2109 return new legVecZOper(); 2110 } 2111 if (legacy) { 2112 switch (ideal_reg) { 2113 case Op_VecS: return new legVecSOper(); 2114 case Op_VecD: return new legVecDOper(); 2115 case Op_VecX: return new legVecXOper(); 2116 case Op_VecY: return new legVecYOper(); 2117 case Op_VecZ: return new legVecZOper(); 2118 } 2119 } else { 2120 switch (ideal_reg) { 2121 case Op_VecS: return new vecSOper(); 2122 case Op_VecD: return new vecDOper(); 2123 case Op_VecX: return new vecXOper(); 2124 case Op_VecY: return new vecYOper(); 2125 case Op_VecZ: return new vecZOper(); 2126 } 2127 } 2128 ShouldNotReachHere(); 2129 return nullptr; 2130 } 2131 2132 bool Matcher::is_reg2reg_move(MachNode* m) { 2133 switch (m->rule()) { 2134 case MoveVec2Leg_rule: 2135 case MoveLeg2Vec_rule: 2136 case MoveF2VL_rule: 2137 case MoveF2LEG_rule: 2138 case MoveVL2F_rule: 2139 case MoveLEG2F_rule: 2140 case MoveD2VL_rule: 2141 case MoveD2LEG_rule: 2142 case MoveVL2D_rule: 2143 case MoveLEG2D_rule: 2144 return true; 2145 default: 2146 return false; 2147 } 2148 } 2149 2150 bool Matcher::is_generic_vector(MachOper* opnd) { 2151 switch (opnd->opcode()) { 2152 case VEC: 2153 case LEGVEC: 2154 return true; 2155 default: 2156 return false; 2157 } 2158 } 2159 2160 //------------------------------------------------------------------------ 2161 2162 const RegMask* Matcher::predicate_reg_mask(void) { 2163 return &_VECTMASK_REG_mask; 2164 } 2165 2166 // Max vector size in bytes. 0 if not supported. 2167 int Matcher::vector_width_in_bytes(BasicType bt) { 2168 assert(is_java_primitive(bt), "only primitive type vectors"); 2169 // SSE2 supports 128bit vectors for all types. 2170 // AVX2 supports 256bit vectors for all types. 2171 // AVX2/EVEX supports 512bit vectors for all types. 2172 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2173 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2174 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2175 size = (UseAVX > 2) ? 64 : 32; 2176 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2177 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2178 // Use flag to limit vector size. 2179 size = MIN2(size,(int)MaxVectorSize); 2180 // Minimum 2 values in vector (or 4 for bytes). 2181 switch (bt) { 2182 case T_DOUBLE: 2183 case T_LONG: 2184 if (size < 16) return 0; 2185 break; 2186 case T_FLOAT: 2187 case T_INT: 2188 if (size < 8) return 0; 2189 break; 2190 case T_BOOLEAN: 2191 if (size < 4) return 0; 2192 break; 2193 case T_CHAR: 2194 if (size < 4) return 0; 2195 break; 2196 case T_BYTE: 2197 if (size < 4) return 0; 2198 break; 2199 case T_SHORT: 2200 if (size < 4) return 0; 2201 break; 2202 default: 2203 ShouldNotReachHere(); 2204 } 2205 return size; 2206 } 2207 2208 // Limits on vector size (number of elements) loaded into vector. 2209 int Matcher::max_vector_size(const BasicType bt) { 2210 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2211 } 2212 int Matcher::min_vector_size(const BasicType bt) { 2213 int max_size = max_vector_size(bt); 2214 // Min size which can be loaded into vector is 4 bytes. 2215 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2216 // Support for calling svml double64 vectors 2217 if (bt == T_DOUBLE) { 2218 size = 1; 2219 } 2220 return MIN2(size,max_size); 2221 } 2222 2223 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2224 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2225 // by default on Cascade Lake 2226 if (VM_Version::is_default_intel_cascade_lake()) { 2227 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2228 } 2229 return Matcher::max_vector_size(bt); 2230 } 2231 2232 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2233 return -1; 2234 } 2235 2236 // Vector ideal reg corresponding to specified size in bytes 2237 uint Matcher::vector_ideal_reg(int size) { 2238 assert(MaxVectorSize >= size, ""); 2239 switch(size) { 2240 case 4: return Op_VecS; 2241 case 8: return Op_VecD; 2242 case 16: return Op_VecX; 2243 case 32: return Op_VecY; 2244 case 64: return Op_VecZ; 2245 } 2246 ShouldNotReachHere(); 2247 return 0; 2248 } 2249 2250 // Check for shift by small constant as well 2251 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2252 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2253 shift->in(2)->get_int() <= 3 && 2254 // Are there other uses besides address expressions? 2255 !matcher->is_visited(shift)) { 2256 address_visited.set(shift->_idx); // Flag as address_visited 2257 mstack.push(shift->in(2), Matcher::Visit); 2258 Node *conv = shift->in(1); 2259 // Allow Matcher to match the rule which bypass 2260 // ConvI2L operation for an array index on LP64 2261 // if the index value is positive. 2262 if (conv->Opcode() == Op_ConvI2L && 2263 conv->as_Type()->type()->is_long()->_lo >= 0 && 2264 // Are there other uses besides address expressions? 2265 !matcher->is_visited(conv)) { 2266 address_visited.set(conv->_idx); // Flag as address_visited 2267 mstack.push(conv->in(1), Matcher::Pre_Visit); 2268 } else { 2269 mstack.push(conv, Matcher::Pre_Visit); 2270 } 2271 return true; 2272 } 2273 return false; 2274 } 2275 2276 // This function identifies sub-graphs in which a 'load' node is 2277 // input to two different nodes, and such that it can be matched 2278 // with BMI instructions like blsi, blsr, etc. 2279 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2280 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2281 // refers to the same node. 2282 // 2283 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2284 // This is a temporary solution until we make DAGs expressible in ADL. 2285 template<typename ConType> 2286 class FusedPatternMatcher { 2287 Node* _op1_node; 2288 Node* _mop_node; 2289 int _con_op; 2290 2291 static int match_next(Node* n, int next_op, int next_op_idx) { 2292 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2293 return -1; 2294 } 2295 2296 if (next_op_idx == -1) { // n is commutative, try rotations 2297 if (n->in(1)->Opcode() == next_op) { 2298 return 1; 2299 } else if (n->in(2)->Opcode() == next_op) { 2300 return 2; 2301 } 2302 } else { 2303 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2304 if (n->in(next_op_idx)->Opcode() == next_op) { 2305 return next_op_idx; 2306 } 2307 } 2308 return -1; 2309 } 2310 2311 public: 2312 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2313 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2314 2315 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2316 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2317 typename ConType::NativeType con_value) { 2318 if (_op1_node->Opcode() != op1) { 2319 return false; 2320 } 2321 if (_mop_node->outcnt() > 2) { 2322 return false; 2323 } 2324 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2325 if (op1_op2_idx == -1) { 2326 return false; 2327 } 2328 // Memory operation must be the other edge 2329 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2330 2331 // Check that the mop node is really what we want 2332 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2333 Node* op2_node = _op1_node->in(op1_op2_idx); 2334 if (op2_node->outcnt() > 1) { 2335 return false; 2336 } 2337 assert(op2_node->Opcode() == op2, "Should be"); 2338 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2339 if (op2_con_idx == -1) { 2340 return false; 2341 } 2342 // Memory operation must be the other edge 2343 int op2_mop_idx = (op2_con_idx & 1) + 1; 2344 // Check that the memory operation is the same node 2345 if (op2_node->in(op2_mop_idx) == _mop_node) { 2346 // Now check the constant 2347 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2348 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2349 return true; 2350 } 2351 } 2352 } 2353 return false; 2354 } 2355 }; 2356 2357 static bool is_bmi_pattern(Node* n, Node* m) { 2358 assert(UseBMI1Instructions, "sanity"); 2359 if (n != nullptr && m != nullptr) { 2360 if (m->Opcode() == Op_LoadI) { 2361 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2362 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2363 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2364 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2365 } else if (m->Opcode() == Op_LoadL) { 2366 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2367 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2368 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2369 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2370 } 2371 } 2372 return false; 2373 } 2374 2375 // Should the matcher clone input 'm' of node 'n'? 2376 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2377 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2378 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2379 mstack.push(m, Visit); 2380 return true; 2381 } 2382 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2383 mstack.push(m, Visit); // m = ShiftCntV 2384 return true; 2385 } 2386 if (is_encode_and_store_pattern(n, m)) { 2387 mstack.push(m, Visit); 2388 return true; 2389 } 2390 return false; 2391 } 2392 2393 // Should the Matcher clone shifts on addressing modes, expecting them 2394 // to be subsumed into complex addressing expressions or compute them 2395 // into registers? 2396 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2397 Node *off = m->in(AddPNode::Offset); 2398 if (off->is_Con()) { 2399 address_visited.test_set(m->_idx); // Flag as address_visited 2400 Node *adr = m->in(AddPNode::Address); 2401 2402 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2403 // AtomicAdd is not an addressing expression. 2404 // Cheap to find it by looking for screwy base. 2405 if (adr->is_AddP() && 2406 !adr->in(AddPNode::Base)->is_top() && 2407 !adr->in(AddPNode::Offset)->is_Con() && 2408 off->get_long() == (int) (off->get_long()) && // immL32 2409 // Are there other uses besides address expressions? 2410 !is_visited(adr)) { 2411 address_visited.set(adr->_idx); // Flag as address_visited 2412 Node *shift = adr->in(AddPNode::Offset); 2413 if (!clone_shift(shift, this, mstack, address_visited)) { 2414 mstack.push(shift, Pre_Visit); 2415 } 2416 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2417 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2418 } else { 2419 mstack.push(adr, Pre_Visit); 2420 } 2421 2422 // Clone X+offset as it also folds into most addressing expressions 2423 mstack.push(off, Visit); 2424 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2425 return true; 2426 } else if (clone_shift(off, this, mstack, address_visited)) { 2427 address_visited.test_set(m->_idx); // Flag as address_visited 2428 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2429 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2436 switch (bt) { 2437 case BoolTest::eq: 2438 return Assembler::eq; 2439 case BoolTest::ne: 2440 return Assembler::neq; 2441 case BoolTest::le: 2442 case BoolTest::ule: 2443 return Assembler::le; 2444 case BoolTest::ge: 2445 case BoolTest::uge: 2446 return Assembler::nlt; 2447 case BoolTest::lt: 2448 case BoolTest::ult: 2449 return Assembler::lt; 2450 case BoolTest::gt: 2451 case BoolTest::ugt: 2452 return Assembler::nle; 2453 default : ShouldNotReachHere(); return Assembler::_false; 2454 } 2455 } 2456 2457 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2458 switch (bt) { 2459 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2460 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2461 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2462 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2463 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2464 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2465 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2466 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2467 } 2468 } 2469 2470 // Helper methods for MachSpillCopyNode::implementation(). 2471 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2472 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2473 assert(ireg == Op_VecS || // 32bit vector 2474 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2475 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2476 "no non-adjacent vector moves" ); 2477 if (masm) { 2478 switch (ireg) { 2479 case Op_VecS: // copy whole register 2480 case Op_VecD: 2481 case Op_VecX: 2482 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2483 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2484 } else { 2485 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2486 } 2487 break; 2488 case Op_VecY: 2489 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2490 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2491 } else { 2492 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2493 } 2494 break; 2495 case Op_VecZ: 2496 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2497 break; 2498 default: 2499 ShouldNotReachHere(); 2500 } 2501 #ifndef PRODUCT 2502 } else { 2503 switch (ireg) { 2504 case Op_VecS: 2505 case Op_VecD: 2506 case Op_VecX: 2507 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2508 break; 2509 case Op_VecY: 2510 case Op_VecZ: 2511 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2512 break; 2513 default: 2514 ShouldNotReachHere(); 2515 } 2516 #endif 2517 } 2518 } 2519 2520 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2521 int stack_offset, int reg, uint ireg, outputStream* st) { 2522 if (masm) { 2523 if (is_load) { 2524 switch (ireg) { 2525 case Op_VecS: 2526 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2527 break; 2528 case Op_VecD: 2529 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecX: 2532 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2533 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2534 } else { 2535 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2536 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2537 } 2538 break; 2539 case Op_VecY: 2540 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2541 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2542 } else { 2543 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2544 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2545 } 2546 break; 2547 case Op_VecZ: 2548 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2549 break; 2550 default: 2551 ShouldNotReachHere(); 2552 } 2553 } else { // store 2554 switch (ireg) { 2555 case Op_VecS: 2556 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2557 break; 2558 case Op_VecD: 2559 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecX: 2562 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2563 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2564 } 2565 else { 2566 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2567 } 2568 break; 2569 case Op_VecY: 2570 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2571 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2572 } 2573 else { 2574 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2575 } 2576 break; 2577 case Op_VecZ: 2578 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2579 break; 2580 default: 2581 ShouldNotReachHere(); 2582 } 2583 } 2584 #ifndef PRODUCT 2585 } else { 2586 if (is_load) { 2587 switch (ireg) { 2588 case Op_VecS: 2589 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2590 break; 2591 case Op_VecD: 2592 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecX: 2595 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecY: 2598 case Op_VecZ: 2599 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2600 break; 2601 default: 2602 ShouldNotReachHere(); 2603 } 2604 } else { // store 2605 switch (ireg) { 2606 case Op_VecS: 2607 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2608 break; 2609 case Op_VecD: 2610 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecX: 2613 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecY: 2616 case Op_VecZ: 2617 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 } 2623 #endif 2624 } 2625 } 2626 2627 template <class T> 2628 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2629 int size = type2aelembytes(bt) * len; 2630 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2631 for (int i = 0; i < len; i++) { 2632 int offset = i * type2aelembytes(bt); 2633 switch (bt) { 2634 case T_BYTE: val->at(i) = con; break; 2635 case T_SHORT: { 2636 jshort c = con; 2637 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2638 break; 2639 } 2640 case T_INT: { 2641 jint c = con; 2642 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2643 break; 2644 } 2645 case T_LONG: { 2646 jlong c = con; 2647 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2648 break; 2649 } 2650 case T_FLOAT: { 2651 jfloat c = con; 2652 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2653 break; 2654 } 2655 case T_DOUBLE: { 2656 jdouble c = con; 2657 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2658 break; 2659 } 2660 default: assert(false, "%s", type2name(bt)); 2661 } 2662 } 2663 return val; 2664 } 2665 2666 static inline jlong high_bit_set(BasicType bt) { 2667 switch (bt) { 2668 case T_BYTE: return 0x8080808080808080; 2669 case T_SHORT: return 0x8000800080008000; 2670 case T_INT: return 0x8000000080000000; 2671 case T_LONG: return 0x8000000000000000; 2672 default: 2673 ShouldNotReachHere(); 2674 return 0; 2675 } 2676 } 2677 2678 #ifndef PRODUCT 2679 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2680 st->print("nop \t# %d bytes pad for loops and calls", _count); 2681 } 2682 #endif 2683 2684 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2685 __ nop(_count); 2686 } 2687 2688 uint MachNopNode::size(PhaseRegAlloc*) const { 2689 return _count; 2690 } 2691 2692 #ifndef PRODUCT 2693 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2694 st->print("# breakpoint"); 2695 } 2696 #endif 2697 2698 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2699 __ int3(); 2700 } 2701 2702 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2703 return MachNode::size(ra_); 2704 } 2705 2706 %} 2707 2708 encode %{ 2709 2710 enc_class call_epilog %{ 2711 if (VerifyStackAtCalls) { 2712 // Check that stack depth is unchanged: find majik cookie on stack 2713 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2714 Label L; 2715 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2716 __ jccb(Assembler::equal, L); 2717 // Die if stack mismatch 2718 __ int3(); 2719 __ bind(L); 2720 } 2721 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2722 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2723 // Search for the corresponding projection, get the register and emit code that initialized it. 2724 uint con = (tf()->range_cc()->cnt() - 1); 2725 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2726 ProjNode* proj = fast_out(i)->as_Proj(); 2727 if (proj->_con == con) { 2728 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2729 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2730 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2731 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2732 __ testq(rax, rax); 2733 __ setb(Assembler::notZero, toReg); 2734 __ movzbl(toReg, toReg); 2735 if (reg->is_stack()) { 2736 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2737 __ movq(Address(rsp, st_off), toReg); 2738 } 2739 break; 2740 } 2741 } 2742 if (return_value_is_used()) { 2743 // An inline type is returned as fields in multiple registers. 2744 // Rax either contains an oop if the inline type is buffered or a pointer 2745 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2746 // if the lowest bit is set to allow C2 to use the oop after null checking. 2747 // rax &= (rax & 1) - 1 2748 __ movptr(rscratch1, rax); 2749 __ andptr(rscratch1, 0x1); 2750 __ subptr(rscratch1, 0x1); 2751 __ andptr(rax, rscratch1); 2752 } 2753 } 2754 %} 2755 2756 %} 2757 2758 // Operands for bound floating pointer register arguments 2759 operand rxmm0() %{ 2760 constraint(ALLOC_IN_RC(xmm0_reg)); 2761 match(VecX); 2762 format%{%} 2763 interface(REG_INTER); 2764 %} 2765 2766 //----------OPERANDS----------------------------------------------------------- 2767 // Operand definitions must precede instruction definitions for correct parsing 2768 // in the ADLC because operands constitute user defined types which are used in 2769 // instruction definitions. 2770 2771 // Vectors 2772 2773 // Dummy generic vector class. Should be used for all vector operands. 2774 // Replaced with vec[SDXYZ] during post-selection pass. 2775 operand vec() %{ 2776 constraint(ALLOC_IN_RC(dynamic)); 2777 match(VecX); 2778 match(VecY); 2779 match(VecZ); 2780 match(VecS); 2781 match(VecD); 2782 2783 format %{ %} 2784 interface(REG_INTER); 2785 %} 2786 2787 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2788 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2789 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2790 // runtime code generation via reg_class_dynamic. 2791 operand legVec() %{ 2792 constraint(ALLOC_IN_RC(dynamic)); 2793 match(VecX); 2794 match(VecY); 2795 match(VecZ); 2796 match(VecS); 2797 match(VecD); 2798 2799 format %{ %} 2800 interface(REG_INTER); 2801 %} 2802 2803 // Replaces vec during post-selection cleanup. See above. 2804 operand vecS() %{ 2805 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2806 match(VecS); 2807 2808 format %{ %} 2809 interface(REG_INTER); 2810 %} 2811 2812 // Replaces legVec during post-selection cleanup. See above. 2813 operand legVecS() %{ 2814 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2815 match(VecS); 2816 2817 format %{ %} 2818 interface(REG_INTER); 2819 %} 2820 2821 // Replaces vec during post-selection cleanup. See above. 2822 operand vecD() %{ 2823 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2824 match(VecD); 2825 2826 format %{ %} 2827 interface(REG_INTER); 2828 %} 2829 2830 // Replaces legVec during post-selection cleanup. See above. 2831 operand legVecD() %{ 2832 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2833 match(VecD); 2834 2835 format %{ %} 2836 interface(REG_INTER); 2837 %} 2838 2839 // Replaces vec during post-selection cleanup. See above. 2840 operand vecX() %{ 2841 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2842 match(VecX); 2843 2844 format %{ %} 2845 interface(REG_INTER); 2846 %} 2847 2848 // Replaces legVec during post-selection cleanup. See above. 2849 operand legVecX() %{ 2850 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2851 match(VecX); 2852 2853 format %{ %} 2854 interface(REG_INTER); 2855 %} 2856 2857 // Replaces vec during post-selection cleanup. See above. 2858 operand vecY() %{ 2859 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2860 match(VecY); 2861 2862 format %{ %} 2863 interface(REG_INTER); 2864 %} 2865 2866 // Replaces legVec during post-selection cleanup. See above. 2867 operand legVecY() %{ 2868 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2869 match(VecY); 2870 2871 format %{ %} 2872 interface(REG_INTER); 2873 %} 2874 2875 // Replaces vec during post-selection cleanup. See above. 2876 operand vecZ() %{ 2877 constraint(ALLOC_IN_RC(vectorz_reg)); 2878 match(VecZ); 2879 2880 format %{ %} 2881 interface(REG_INTER); 2882 %} 2883 2884 // Replaces legVec during post-selection cleanup. See above. 2885 operand legVecZ() %{ 2886 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2887 match(VecZ); 2888 2889 format %{ %} 2890 interface(REG_INTER); 2891 %} 2892 2893 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2894 2895 // ============================================================================ 2896 2897 instruct ShouldNotReachHere() %{ 2898 match(Halt); 2899 format %{ "stop\t# ShouldNotReachHere" %} 2900 ins_encode %{ 2901 if (is_reachable()) { 2902 const char* str = __ code_string(_halt_reason); 2903 __ stop(str); 2904 } 2905 %} 2906 ins_pipe(pipe_slow); 2907 %} 2908 2909 // ============================================================================ 2910 2911 instruct addF_reg(regF dst, regF src) %{ 2912 predicate(UseAVX == 0); 2913 match(Set dst (AddF dst src)); 2914 2915 format %{ "addss $dst, $src" %} 2916 ins_cost(150); 2917 ins_encode %{ 2918 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2919 %} 2920 ins_pipe(pipe_slow); 2921 %} 2922 2923 instruct addF_mem(regF dst, memory src) %{ 2924 predicate(UseAVX == 0); 2925 match(Set dst (AddF dst (LoadF src))); 2926 2927 format %{ "addss $dst, $src" %} 2928 ins_cost(150); 2929 ins_encode %{ 2930 __ addss($dst$$XMMRegister, $src$$Address); 2931 %} 2932 ins_pipe(pipe_slow); 2933 %} 2934 2935 instruct addF_imm(regF dst, immF con) %{ 2936 predicate(UseAVX == 0); 2937 match(Set dst (AddF dst con)); 2938 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2939 ins_cost(150); 2940 ins_encode %{ 2941 __ addss($dst$$XMMRegister, $constantaddress($con)); 2942 %} 2943 ins_pipe(pipe_slow); 2944 %} 2945 2946 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2947 predicate(UseAVX > 0); 2948 match(Set dst (AddF src1 src2)); 2949 2950 format %{ "vaddss $dst, $src1, $src2" %} 2951 ins_cost(150); 2952 ins_encode %{ 2953 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2954 %} 2955 ins_pipe(pipe_slow); 2956 %} 2957 2958 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2959 predicate(UseAVX > 0); 2960 match(Set dst (AddF src1 (LoadF src2))); 2961 2962 format %{ "vaddss $dst, $src1, $src2" %} 2963 ins_cost(150); 2964 ins_encode %{ 2965 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2966 %} 2967 ins_pipe(pipe_slow); 2968 %} 2969 2970 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2971 predicate(UseAVX > 0); 2972 match(Set dst (AddF src con)); 2973 2974 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2975 ins_cost(150); 2976 ins_encode %{ 2977 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2978 %} 2979 ins_pipe(pipe_slow); 2980 %} 2981 2982 instruct addD_reg(regD dst, regD src) %{ 2983 predicate(UseAVX == 0); 2984 match(Set dst (AddD dst src)); 2985 2986 format %{ "addsd $dst, $src" %} 2987 ins_cost(150); 2988 ins_encode %{ 2989 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2990 %} 2991 ins_pipe(pipe_slow); 2992 %} 2993 2994 instruct addD_mem(regD dst, memory src) %{ 2995 predicate(UseAVX == 0); 2996 match(Set dst (AddD dst (LoadD src))); 2997 2998 format %{ "addsd $dst, $src" %} 2999 ins_cost(150); 3000 ins_encode %{ 3001 __ addsd($dst$$XMMRegister, $src$$Address); 3002 %} 3003 ins_pipe(pipe_slow); 3004 %} 3005 3006 instruct addD_imm(regD dst, immD con) %{ 3007 predicate(UseAVX == 0); 3008 match(Set dst (AddD dst con)); 3009 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3010 ins_cost(150); 3011 ins_encode %{ 3012 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3013 %} 3014 ins_pipe(pipe_slow); 3015 %} 3016 3017 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3018 predicate(UseAVX > 0); 3019 match(Set dst (AddD src1 src2)); 3020 3021 format %{ "vaddsd $dst, $src1, $src2" %} 3022 ins_cost(150); 3023 ins_encode %{ 3024 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3025 %} 3026 ins_pipe(pipe_slow); 3027 %} 3028 3029 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3030 predicate(UseAVX > 0); 3031 match(Set dst (AddD src1 (LoadD src2))); 3032 3033 format %{ "vaddsd $dst, $src1, $src2" %} 3034 ins_cost(150); 3035 ins_encode %{ 3036 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3037 %} 3038 ins_pipe(pipe_slow); 3039 %} 3040 3041 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3042 predicate(UseAVX > 0); 3043 match(Set dst (AddD src con)); 3044 3045 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct subF_reg(regF dst, regF src) %{ 3054 predicate(UseAVX == 0); 3055 match(Set dst (SubF dst src)); 3056 3057 format %{ "subss $dst, $src" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct subF_mem(regF dst, memory src) %{ 3066 predicate(UseAVX == 0); 3067 match(Set dst (SubF dst (LoadF src))); 3068 3069 format %{ "subss $dst, $src" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ subss($dst$$XMMRegister, $src$$Address); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct subF_imm(regF dst, immF con) %{ 3078 predicate(UseAVX == 0); 3079 match(Set dst (SubF dst con)); 3080 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3081 ins_cost(150); 3082 ins_encode %{ 3083 __ subss($dst$$XMMRegister, $constantaddress($con)); 3084 %} 3085 ins_pipe(pipe_slow); 3086 %} 3087 3088 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3089 predicate(UseAVX > 0); 3090 match(Set dst (SubF src1 src2)); 3091 3092 format %{ "vsubss $dst, $src1, $src2" %} 3093 ins_cost(150); 3094 ins_encode %{ 3095 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3096 %} 3097 ins_pipe(pipe_slow); 3098 %} 3099 3100 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3101 predicate(UseAVX > 0); 3102 match(Set dst (SubF src1 (LoadF src2))); 3103 3104 format %{ "vsubss $dst, $src1, $src2" %} 3105 ins_cost(150); 3106 ins_encode %{ 3107 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3108 %} 3109 ins_pipe(pipe_slow); 3110 %} 3111 3112 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3113 predicate(UseAVX > 0); 3114 match(Set dst (SubF src con)); 3115 3116 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3120 %} 3121 ins_pipe(pipe_slow); 3122 %} 3123 3124 instruct subD_reg(regD dst, regD src) %{ 3125 predicate(UseAVX == 0); 3126 match(Set dst (SubD dst src)); 3127 3128 format %{ "subsd $dst, $src" %} 3129 ins_cost(150); 3130 ins_encode %{ 3131 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct subD_mem(regD dst, memory src) %{ 3137 predicate(UseAVX == 0); 3138 match(Set dst (SubD dst (LoadD src))); 3139 3140 format %{ "subsd $dst, $src" %} 3141 ins_cost(150); 3142 ins_encode %{ 3143 __ subsd($dst$$XMMRegister, $src$$Address); 3144 %} 3145 ins_pipe(pipe_slow); 3146 %} 3147 3148 instruct subD_imm(regD dst, immD con) %{ 3149 predicate(UseAVX == 0); 3150 match(Set dst (SubD dst con)); 3151 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3152 ins_cost(150); 3153 ins_encode %{ 3154 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3155 %} 3156 ins_pipe(pipe_slow); 3157 %} 3158 3159 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3160 predicate(UseAVX > 0); 3161 match(Set dst (SubD src1 src2)); 3162 3163 format %{ "vsubsd $dst, $src1, $src2" %} 3164 ins_cost(150); 3165 ins_encode %{ 3166 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3167 %} 3168 ins_pipe(pipe_slow); 3169 %} 3170 3171 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3172 predicate(UseAVX > 0); 3173 match(Set dst (SubD src1 (LoadD src2))); 3174 3175 format %{ "vsubsd $dst, $src1, $src2" %} 3176 ins_cost(150); 3177 ins_encode %{ 3178 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3179 %} 3180 ins_pipe(pipe_slow); 3181 %} 3182 3183 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3184 predicate(UseAVX > 0); 3185 match(Set dst (SubD src con)); 3186 3187 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3188 ins_cost(150); 3189 ins_encode %{ 3190 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3191 %} 3192 ins_pipe(pipe_slow); 3193 %} 3194 3195 instruct mulF_reg(regF dst, regF src) %{ 3196 predicate(UseAVX == 0); 3197 match(Set dst (MulF dst src)); 3198 3199 format %{ "mulss $dst, $src" %} 3200 ins_cost(150); 3201 ins_encode %{ 3202 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3203 %} 3204 ins_pipe(pipe_slow); 3205 %} 3206 3207 instruct mulF_mem(regF dst, memory src) %{ 3208 predicate(UseAVX == 0); 3209 match(Set dst (MulF dst (LoadF src))); 3210 3211 format %{ "mulss $dst, $src" %} 3212 ins_cost(150); 3213 ins_encode %{ 3214 __ mulss($dst$$XMMRegister, $src$$Address); 3215 %} 3216 ins_pipe(pipe_slow); 3217 %} 3218 3219 instruct mulF_imm(regF dst, immF con) %{ 3220 predicate(UseAVX == 0); 3221 match(Set dst (MulF dst con)); 3222 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3223 ins_cost(150); 3224 ins_encode %{ 3225 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3226 %} 3227 ins_pipe(pipe_slow); 3228 %} 3229 3230 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3231 predicate(UseAVX > 0); 3232 match(Set dst (MulF src1 src2)); 3233 3234 format %{ "vmulss $dst, $src1, $src2" %} 3235 ins_cost(150); 3236 ins_encode %{ 3237 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3238 %} 3239 ins_pipe(pipe_slow); 3240 %} 3241 3242 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3243 predicate(UseAVX > 0); 3244 match(Set dst (MulF src1 (LoadF src2))); 3245 3246 format %{ "vmulss $dst, $src1, $src2" %} 3247 ins_cost(150); 3248 ins_encode %{ 3249 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3250 %} 3251 ins_pipe(pipe_slow); 3252 %} 3253 3254 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3255 predicate(UseAVX > 0); 3256 match(Set dst (MulF src con)); 3257 3258 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3259 ins_cost(150); 3260 ins_encode %{ 3261 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3262 %} 3263 ins_pipe(pipe_slow); 3264 %} 3265 3266 instruct mulD_reg(regD dst, regD src) %{ 3267 predicate(UseAVX == 0); 3268 match(Set dst (MulD dst src)); 3269 3270 format %{ "mulsd $dst, $src" %} 3271 ins_cost(150); 3272 ins_encode %{ 3273 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3274 %} 3275 ins_pipe(pipe_slow); 3276 %} 3277 3278 instruct mulD_mem(regD dst, memory src) %{ 3279 predicate(UseAVX == 0); 3280 match(Set dst (MulD dst (LoadD src))); 3281 3282 format %{ "mulsd $dst, $src" %} 3283 ins_cost(150); 3284 ins_encode %{ 3285 __ mulsd($dst$$XMMRegister, $src$$Address); 3286 %} 3287 ins_pipe(pipe_slow); 3288 %} 3289 3290 instruct mulD_imm(regD dst, immD con) %{ 3291 predicate(UseAVX == 0); 3292 match(Set dst (MulD dst con)); 3293 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3294 ins_cost(150); 3295 ins_encode %{ 3296 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3297 %} 3298 ins_pipe(pipe_slow); 3299 %} 3300 3301 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3302 predicate(UseAVX > 0); 3303 match(Set dst (MulD src1 src2)); 3304 3305 format %{ "vmulsd $dst, $src1, $src2" %} 3306 ins_cost(150); 3307 ins_encode %{ 3308 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3309 %} 3310 ins_pipe(pipe_slow); 3311 %} 3312 3313 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3314 predicate(UseAVX > 0); 3315 match(Set dst (MulD src1 (LoadD src2))); 3316 3317 format %{ "vmulsd $dst, $src1, $src2" %} 3318 ins_cost(150); 3319 ins_encode %{ 3320 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3321 %} 3322 ins_pipe(pipe_slow); 3323 %} 3324 3325 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3326 predicate(UseAVX > 0); 3327 match(Set dst (MulD src con)); 3328 3329 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3330 ins_cost(150); 3331 ins_encode %{ 3332 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3333 %} 3334 ins_pipe(pipe_slow); 3335 %} 3336 3337 instruct divF_reg(regF dst, regF src) %{ 3338 predicate(UseAVX == 0); 3339 match(Set dst (DivF dst src)); 3340 3341 format %{ "divss $dst, $src" %} 3342 ins_cost(150); 3343 ins_encode %{ 3344 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3345 %} 3346 ins_pipe(pipe_slow); 3347 %} 3348 3349 instruct divF_mem(regF dst, memory src) %{ 3350 predicate(UseAVX == 0); 3351 match(Set dst (DivF dst (LoadF src))); 3352 3353 format %{ "divss $dst, $src" %} 3354 ins_cost(150); 3355 ins_encode %{ 3356 __ divss($dst$$XMMRegister, $src$$Address); 3357 %} 3358 ins_pipe(pipe_slow); 3359 %} 3360 3361 instruct divF_imm(regF dst, immF con) %{ 3362 predicate(UseAVX == 0); 3363 match(Set dst (DivF dst con)); 3364 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3365 ins_cost(150); 3366 ins_encode %{ 3367 __ divss($dst$$XMMRegister, $constantaddress($con)); 3368 %} 3369 ins_pipe(pipe_slow); 3370 %} 3371 3372 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3373 predicate(UseAVX > 0); 3374 match(Set dst (DivF src1 src2)); 3375 3376 format %{ "vdivss $dst, $src1, $src2" %} 3377 ins_cost(150); 3378 ins_encode %{ 3379 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3380 %} 3381 ins_pipe(pipe_slow); 3382 %} 3383 3384 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3385 predicate(UseAVX > 0); 3386 match(Set dst (DivF src1 (LoadF src2))); 3387 3388 format %{ "vdivss $dst, $src1, $src2" %} 3389 ins_cost(150); 3390 ins_encode %{ 3391 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3392 %} 3393 ins_pipe(pipe_slow); 3394 %} 3395 3396 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3397 predicate(UseAVX > 0); 3398 match(Set dst (DivF src con)); 3399 3400 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3404 %} 3405 ins_pipe(pipe_slow); 3406 %} 3407 3408 instruct divD_reg(regD dst, regD src) %{ 3409 predicate(UseAVX == 0); 3410 match(Set dst (DivD dst src)); 3411 3412 format %{ "divsd $dst, $src" %} 3413 ins_cost(150); 3414 ins_encode %{ 3415 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3416 %} 3417 ins_pipe(pipe_slow); 3418 %} 3419 3420 instruct divD_mem(regD dst, memory src) %{ 3421 predicate(UseAVX == 0); 3422 match(Set dst (DivD dst (LoadD src))); 3423 3424 format %{ "divsd $dst, $src" %} 3425 ins_cost(150); 3426 ins_encode %{ 3427 __ divsd($dst$$XMMRegister, $src$$Address); 3428 %} 3429 ins_pipe(pipe_slow); 3430 %} 3431 3432 instruct divD_imm(regD dst, immD con) %{ 3433 predicate(UseAVX == 0); 3434 match(Set dst (DivD dst con)); 3435 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3436 ins_cost(150); 3437 ins_encode %{ 3438 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3439 %} 3440 ins_pipe(pipe_slow); 3441 %} 3442 3443 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3444 predicate(UseAVX > 0); 3445 match(Set dst (DivD src1 src2)); 3446 3447 format %{ "vdivsd $dst, $src1, $src2" %} 3448 ins_cost(150); 3449 ins_encode %{ 3450 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3451 %} 3452 ins_pipe(pipe_slow); 3453 %} 3454 3455 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3456 predicate(UseAVX > 0); 3457 match(Set dst (DivD src1 (LoadD src2))); 3458 3459 format %{ "vdivsd $dst, $src1, $src2" %} 3460 ins_cost(150); 3461 ins_encode %{ 3462 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3463 %} 3464 ins_pipe(pipe_slow); 3465 %} 3466 3467 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3468 predicate(UseAVX > 0); 3469 match(Set dst (DivD src con)); 3470 3471 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3472 ins_cost(150); 3473 ins_encode %{ 3474 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3475 %} 3476 ins_pipe(pipe_slow); 3477 %} 3478 3479 instruct absF_reg(regF dst) %{ 3480 predicate(UseAVX == 0); 3481 match(Set dst (AbsF dst)); 3482 ins_cost(150); 3483 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3484 ins_encode %{ 3485 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3486 %} 3487 ins_pipe(pipe_slow); 3488 %} 3489 3490 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3491 predicate(UseAVX > 0); 3492 match(Set dst (AbsF src)); 3493 ins_cost(150); 3494 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3495 ins_encode %{ 3496 int vlen_enc = Assembler::AVX_128bit; 3497 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3498 ExternalAddress(float_signmask()), vlen_enc); 3499 %} 3500 ins_pipe(pipe_slow); 3501 %} 3502 3503 instruct absD_reg(regD dst) %{ 3504 predicate(UseAVX == 0); 3505 match(Set dst (AbsD dst)); 3506 ins_cost(150); 3507 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3508 "# abs double by sign masking" %} 3509 ins_encode %{ 3510 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3511 %} 3512 ins_pipe(pipe_slow); 3513 %} 3514 3515 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3516 predicate(UseAVX > 0); 3517 match(Set dst (AbsD src)); 3518 ins_cost(150); 3519 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3520 "# abs double by sign masking" %} 3521 ins_encode %{ 3522 int vlen_enc = Assembler::AVX_128bit; 3523 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3524 ExternalAddress(double_signmask()), vlen_enc); 3525 %} 3526 ins_pipe(pipe_slow); 3527 %} 3528 3529 instruct negF_reg(regF dst) %{ 3530 predicate(UseAVX == 0); 3531 match(Set dst (NegF dst)); 3532 ins_cost(150); 3533 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3534 ins_encode %{ 3535 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3536 %} 3537 ins_pipe(pipe_slow); 3538 %} 3539 3540 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3541 predicate(UseAVX > 0); 3542 match(Set dst (NegF src)); 3543 ins_cost(150); 3544 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3545 ins_encode %{ 3546 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3547 ExternalAddress(float_signflip())); 3548 %} 3549 ins_pipe(pipe_slow); 3550 %} 3551 3552 instruct negD_reg(regD dst) %{ 3553 predicate(UseAVX == 0); 3554 match(Set dst (NegD dst)); 3555 ins_cost(150); 3556 format %{ "xorpd $dst, [0x8000000000000000]\t" 3557 "# neg double by sign flipping" %} 3558 ins_encode %{ 3559 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3560 %} 3561 ins_pipe(pipe_slow); 3562 %} 3563 3564 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3565 predicate(UseAVX > 0); 3566 match(Set dst (NegD src)); 3567 ins_cost(150); 3568 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3569 "# neg double by sign flipping" %} 3570 ins_encode %{ 3571 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3572 ExternalAddress(double_signflip())); 3573 %} 3574 ins_pipe(pipe_slow); 3575 %} 3576 3577 // sqrtss instruction needs destination register to be pre initialized for best performance 3578 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3579 instruct sqrtF_reg(regF dst) %{ 3580 match(Set dst (SqrtF dst)); 3581 format %{ "sqrtss $dst, $dst" %} 3582 ins_encode %{ 3583 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3584 %} 3585 ins_pipe(pipe_slow); 3586 %} 3587 3588 // sqrtsd instruction needs destination register to be pre initialized for best performance 3589 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3590 instruct sqrtD_reg(regD dst) %{ 3591 match(Set dst (SqrtD dst)); 3592 format %{ "sqrtsd $dst, $dst" %} 3593 ins_encode %{ 3594 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3595 %} 3596 ins_pipe(pipe_slow); 3597 %} 3598 3599 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3600 effect(TEMP tmp); 3601 match(Set dst (ConvF2HF src)); 3602 ins_cost(125); 3603 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3604 ins_encode %{ 3605 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3606 %} 3607 ins_pipe( pipe_slow ); 3608 %} 3609 3610 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3611 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3612 effect(TEMP ktmp, TEMP rtmp); 3613 match(Set mem (StoreC mem (ConvF2HF src))); 3614 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3615 ins_encode %{ 3616 __ movl($rtmp$$Register, 0x1); 3617 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3618 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3619 %} 3620 ins_pipe( pipe_slow ); 3621 %} 3622 3623 instruct vconvF2HF(vec dst, vec src) %{ 3624 match(Set dst (VectorCastF2HF src)); 3625 format %{ "vector_conv_F2HF $dst $src" %} 3626 ins_encode %{ 3627 int vlen_enc = vector_length_encoding(this, $src); 3628 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3634 predicate(n->as_StoreVector()->memory_size() >= 16); 3635 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3636 format %{ "vcvtps2ph $mem,$src" %} 3637 ins_encode %{ 3638 int vlen_enc = vector_length_encoding(this, $src); 3639 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3645 match(Set dst (ConvHF2F src)); 3646 format %{ "vcvtph2ps $dst,$src" %} 3647 ins_encode %{ 3648 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3649 %} 3650 ins_pipe( pipe_slow ); 3651 %} 3652 3653 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3654 match(Set dst (VectorCastHF2F (LoadVector mem))); 3655 format %{ "vcvtph2ps $dst,$mem" %} 3656 ins_encode %{ 3657 int vlen_enc = vector_length_encoding(this); 3658 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3659 %} 3660 ins_pipe( pipe_slow ); 3661 %} 3662 3663 instruct vconvHF2F(vec dst, vec src) %{ 3664 match(Set dst (VectorCastHF2F src)); 3665 ins_cost(125); 3666 format %{ "vector_conv_HF2F $dst,$src" %} 3667 ins_encode %{ 3668 int vlen_enc = vector_length_encoding(this); 3669 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 // ---------------------------------------- VectorReinterpret ------------------------------------ 3675 instruct reinterpret_mask(kReg dst) %{ 3676 predicate(n->bottom_type()->isa_vectmask() && 3677 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3678 match(Set dst (VectorReinterpret dst)); 3679 ins_cost(125); 3680 format %{ "vector_reinterpret $dst\t!" %} 3681 ins_encode %{ 3682 // empty 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3688 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3689 n->bottom_type()->isa_vectmask() && 3690 n->in(1)->bottom_type()->isa_vectmask() && 3691 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3692 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3693 match(Set dst (VectorReinterpret src)); 3694 effect(TEMP xtmp); 3695 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3696 ins_encode %{ 3697 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3698 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3699 assert(src_sz == dst_sz , "src and dst size mismatch"); 3700 int vlen_enc = vector_length_encoding(src_sz); 3701 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3702 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3708 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3709 n->bottom_type()->isa_vectmask() && 3710 n->in(1)->bottom_type()->isa_vectmask() && 3711 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3712 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3713 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3714 match(Set dst (VectorReinterpret src)); 3715 effect(TEMP xtmp); 3716 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3717 ins_encode %{ 3718 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3719 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3720 assert(src_sz == dst_sz , "src and dst size mismatch"); 3721 int vlen_enc = vector_length_encoding(src_sz); 3722 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3723 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3729 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3730 n->bottom_type()->isa_vectmask() && 3731 n->in(1)->bottom_type()->isa_vectmask() && 3732 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3733 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3734 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3735 match(Set dst (VectorReinterpret src)); 3736 effect(TEMP xtmp); 3737 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3738 ins_encode %{ 3739 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3740 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3741 assert(src_sz == dst_sz , "src and dst size mismatch"); 3742 int vlen_enc = vector_length_encoding(src_sz); 3743 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3744 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct reinterpret(vec dst) %{ 3750 predicate(!n->bottom_type()->isa_vectmask() && 3751 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3752 match(Set dst (VectorReinterpret dst)); 3753 ins_cost(125); 3754 format %{ "vector_reinterpret $dst\t!" %} 3755 ins_encode %{ 3756 // empty 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct reinterpret_expand(vec dst, vec src) %{ 3762 predicate(UseAVX == 0 && 3763 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3764 match(Set dst (VectorReinterpret src)); 3765 ins_cost(125); 3766 effect(TEMP dst); 3767 format %{ "vector_reinterpret_expand $dst,$src" %} 3768 ins_encode %{ 3769 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3770 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3771 3772 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3773 if (src_vlen_in_bytes == 4) { 3774 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3775 } else { 3776 assert(src_vlen_in_bytes == 8, ""); 3777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3778 } 3779 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3780 %} 3781 ins_pipe( pipe_slow ); 3782 %} 3783 3784 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3785 predicate(UseAVX > 0 && 3786 !n->bottom_type()->isa_vectmask() && 3787 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3788 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3789 match(Set dst (VectorReinterpret src)); 3790 ins_cost(125); 3791 format %{ "vector_reinterpret_expand $dst,$src" %} 3792 ins_encode %{ 3793 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 3799 instruct vreinterpret_expand(legVec dst, vec src) %{ 3800 predicate(UseAVX > 0 && 3801 !n->bottom_type()->isa_vectmask() && 3802 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3803 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3804 match(Set dst (VectorReinterpret src)); 3805 ins_cost(125); 3806 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3807 ins_encode %{ 3808 switch (Matcher::vector_length_in_bytes(this, $src)) { 3809 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3810 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3811 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3812 default: ShouldNotReachHere(); 3813 } 3814 %} 3815 ins_pipe( pipe_slow ); 3816 %} 3817 3818 instruct reinterpret_shrink(vec dst, legVec src) %{ 3819 predicate(!n->bottom_type()->isa_vectmask() && 3820 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3821 match(Set dst (VectorReinterpret src)); 3822 ins_cost(125); 3823 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3824 ins_encode %{ 3825 switch (Matcher::vector_length_in_bytes(this)) { 3826 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3827 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3828 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3829 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3830 default: ShouldNotReachHere(); 3831 } 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 // ---------------------------------------------------------------------------------------------------- 3837 3838 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3839 match(Set dst (RoundDoubleMode src rmode)); 3840 format %{ "roundsd $dst,$src" %} 3841 ins_cost(150); 3842 ins_encode %{ 3843 assert(UseSSE >= 4, "required"); 3844 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3846 } 3847 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3848 %} 3849 ins_pipe(pipe_slow); 3850 %} 3851 3852 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3853 match(Set dst (RoundDoubleMode con rmode)); 3854 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3855 ins_cost(150); 3856 ins_encode %{ 3857 assert(UseSSE >= 4, "required"); 3858 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3859 %} 3860 ins_pipe(pipe_slow); 3861 %} 3862 3863 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3864 predicate(Matcher::vector_length(n) < 8); 3865 match(Set dst (RoundDoubleModeV src rmode)); 3866 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3867 ins_encode %{ 3868 assert(UseAVX > 0, "required"); 3869 int vlen_enc = vector_length_encoding(this); 3870 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3871 %} 3872 ins_pipe( pipe_slow ); 3873 %} 3874 3875 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3876 predicate(Matcher::vector_length(n) == 8); 3877 match(Set dst (RoundDoubleModeV src rmode)); 3878 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3879 ins_encode %{ 3880 assert(UseAVX > 2, "required"); 3881 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3882 %} 3883 ins_pipe( pipe_slow ); 3884 %} 3885 3886 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3887 predicate(Matcher::vector_length(n) < 8); 3888 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3889 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3890 ins_encode %{ 3891 assert(UseAVX > 0, "required"); 3892 int vlen_enc = vector_length_encoding(this); 3893 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3894 %} 3895 ins_pipe( pipe_slow ); 3896 %} 3897 3898 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3899 predicate(Matcher::vector_length(n) == 8); 3900 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3901 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3902 ins_encode %{ 3903 assert(UseAVX > 2, "required"); 3904 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 instruct onspinwait() %{ 3910 match(OnSpinWait); 3911 ins_cost(200); 3912 3913 format %{ 3914 $$template 3915 $$emit$$"pause\t! membar_onspinwait" 3916 %} 3917 ins_encode %{ 3918 __ pause(); 3919 %} 3920 ins_pipe(pipe_slow); 3921 %} 3922 3923 // a * b + c 3924 instruct fmaD_reg(regD a, regD b, regD c) %{ 3925 match(Set c (FmaD c (Binary a b))); 3926 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3927 ins_cost(150); 3928 ins_encode %{ 3929 assert(UseFMA, "Needs FMA instructions support."); 3930 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3931 %} 3932 ins_pipe( pipe_slow ); 3933 %} 3934 3935 // a * b + c 3936 instruct fmaF_reg(regF a, regF b, regF c) %{ 3937 match(Set c (FmaF c (Binary a b))); 3938 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3939 ins_cost(150); 3940 ins_encode %{ 3941 assert(UseFMA, "Needs FMA instructions support."); 3942 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 // ====================VECTOR INSTRUCTIONS===================================== 3948 3949 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3950 instruct MoveVec2Leg(legVec dst, vec src) %{ 3951 match(Set dst src); 3952 format %{ "" %} 3953 ins_encode %{ 3954 ShouldNotReachHere(); 3955 %} 3956 ins_pipe( fpu_reg_reg ); 3957 %} 3958 3959 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3960 match(Set dst src); 3961 format %{ "" %} 3962 ins_encode %{ 3963 ShouldNotReachHere(); 3964 %} 3965 ins_pipe( fpu_reg_reg ); 3966 %} 3967 3968 // ============================================================================ 3969 3970 // Load vectors generic operand pattern 3971 instruct loadV(vec dst, memory mem) %{ 3972 match(Set dst (LoadVector mem)); 3973 ins_cost(125); 3974 format %{ "load_vector $dst,$mem" %} 3975 ins_encode %{ 3976 BasicType bt = Matcher::vector_element_basic_type(this); 3977 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 // Store vectors generic operand pattern. 3983 instruct storeV(memory mem, vec src) %{ 3984 match(Set mem (StoreVector mem src)); 3985 ins_cost(145); 3986 format %{ "store_vector $mem,$src\n\t" %} 3987 ins_encode %{ 3988 switch (Matcher::vector_length_in_bytes(this, $src)) { 3989 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3990 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3991 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3992 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3993 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3994 default: ShouldNotReachHere(); 3995 } 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 // ---------------------------------------- Gather ------------------------------------ 4001 4002 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4003 4004 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4005 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4006 Matcher::vector_length_in_bytes(n) <= 32); 4007 match(Set dst (LoadVectorGather mem idx)); 4008 effect(TEMP dst, TEMP tmp, TEMP mask); 4009 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4010 ins_encode %{ 4011 int vlen_enc = vector_length_encoding(this); 4012 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4013 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4014 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4015 __ lea($tmp$$Register, $mem$$Address); 4016 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 4021 4022 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4023 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4024 !is_subword_type(Matcher::vector_element_basic_type(n))); 4025 match(Set dst (LoadVectorGather mem idx)); 4026 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4027 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4028 ins_encode %{ 4029 int vlen_enc = vector_length_encoding(this); 4030 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4031 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4032 __ lea($tmp$$Register, $mem$$Address); 4033 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4034 %} 4035 ins_pipe( pipe_slow ); 4036 %} 4037 4038 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4039 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4040 !is_subword_type(Matcher::vector_element_basic_type(n))); 4041 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4042 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4043 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4044 ins_encode %{ 4045 assert(UseAVX > 2, "sanity"); 4046 int vlen_enc = vector_length_encoding(this); 4047 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4048 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4049 // Note: Since gather instruction partially updates the opmask register used 4050 // for predication hense moving mask operand to a temporary. 4051 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4052 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4053 __ lea($tmp$$Register, $mem$$Address); 4054 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4055 %} 4056 ins_pipe( pipe_slow ); 4057 %} 4058 4059 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4060 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4061 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4062 effect(TEMP tmp, TEMP rtmp); 4063 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4064 ins_encode %{ 4065 int vlen_enc = vector_length_encoding(this); 4066 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4067 __ lea($tmp$$Register, $mem$$Address); 4068 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4069 %} 4070 ins_pipe( pipe_slow ); 4071 %} 4072 4073 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4074 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4075 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4076 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4077 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4078 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4079 ins_encode %{ 4080 int vlen_enc = vector_length_encoding(this); 4081 int vector_len = Matcher::vector_length(this); 4082 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4083 __ lea($tmp$$Register, $mem$$Address); 4084 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4085 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4086 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4092 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4093 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4094 effect(TEMP tmp, TEMP rtmp, KILL cr); 4095 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4096 ins_encode %{ 4097 int vlen_enc = vector_length_encoding(this); 4098 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4099 __ lea($tmp$$Register, $mem$$Address); 4100 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4101 %} 4102 ins_pipe( pipe_slow ); 4103 %} 4104 4105 4106 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4107 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4108 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4109 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4110 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4111 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4112 ins_encode %{ 4113 int vlen_enc = vector_length_encoding(this); 4114 int vector_len = Matcher::vector_length(this); 4115 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4116 __ lea($tmp$$Register, $mem$$Address); 4117 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4118 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4119 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 4124 4125 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4126 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4127 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4128 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4129 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4130 ins_encode %{ 4131 int vlen_enc = vector_length_encoding(this); 4132 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4133 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4134 __ lea($tmp$$Register, $mem$$Address); 4135 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4136 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4137 %} 4138 ins_pipe( pipe_slow ); 4139 %} 4140 4141 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4142 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4143 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4144 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4145 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4146 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4147 ins_encode %{ 4148 int vlen_enc = vector_length_encoding(this); 4149 int vector_len = Matcher::vector_length(this); 4150 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4151 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4152 __ lea($tmp$$Register, $mem$$Address); 4153 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4154 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4155 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4156 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4157 %} 4158 ins_pipe( pipe_slow ); 4159 %} 4160 4161 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4162 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4163 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4164 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4165 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4166 ins_encode %{ 4167 int vlen_enc = vector_length_encoding(this); 4168 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4169 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4170 __ lea($tmp$$Register, $mem$$Address); 4171 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4172 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4173 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4179 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4180 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4181 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4182 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4183 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4184 ins_encode %{ 4185 int vlen_enc = vector_length_encoding(this); 4186 int vector_len = Matcher::vector_length(this); 4187 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4188 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4189 __ lea($tmp$$Register, $mem$$Address); 4190 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4191 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4192 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4193 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4199 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4200 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4201 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4202 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4203 ins_encode %{ 4204 int vlen_enc = vector_length_encoding(this); 4205 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4206 __ lea($tmp$$Register, $mem$$Address); 4207 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4208 if (elem_bt == T_SHORT) { 4209 __ movl($mask_idx$$Register, 0x55555555); 4210 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4211 } 4212 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4213 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4219 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4220 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4221 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4222 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4223 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4224 ins_encode %{ 4225 int vlen_enc = vector_length_encoding(this); 4226 int vector_len = Matcher::vector_length(this); 4227 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4228 __ lea($tmp$$Register, $mem$$Address); 4229 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4230 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4231 if (elem_bt == T_SHORT) { 4232 __ movl($mask_idx$$Register, 0x55555555); 4233 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4234 } 4235 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4236 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4237 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4243 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4244 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4245 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4246 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4247 ins_encode %{ 4248 int vlen_enc = vector_length_encoding(this); 4249 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4250 __ lea($tmp$$Register, $mem$$Address); 4251 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4252 if (elem_bt == T_SHORT) { 4253 __ movl($mask_idx$$Register, 0x55555555); 4254 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4255 } 4256 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4257 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4258 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4259 %} 4260 ins_pipe( pipe_slow ); 4261 %} 4262 4263 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4264 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4265 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4266 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4267 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4268 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4269 ins_encode %{ 4270 int vlen_enc = vector_length_encoding(this); 4271 int vector_len = Matcher::vector_length(this); 4272 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4273 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4274 __ lea($tmp$$Register, $mem$$Address); 4275 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4276 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4277 if (elem_bt == T_SHORT) { 4278 __ movl($mask_idx$$Register, 0x55555555); 4279 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4280 } 4281 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4282 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4283 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 // ====================Scatter======================================= 4289 4290 // Scatter INT, LONG, FLOAT, DOUBLE 4291 4292 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4293 predicate(UseAVX > 2); 4294 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4295 effect(TEMP tmp, TEMP ktmp); 4296 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4297 ins_encode %{ 4298 int vlen_enc = vector_length_encoding(this, $src); 4299 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4300 4301 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4302 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4303 4304 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4305 __ lea($tmp$$Register, $mem$$Address); 4306 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4307 %} 4308 ins_pipe( pipe_slow ); 4309 %} 4310 4311 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4312 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4313 effect(TEMP tmp, TEMP ktmp); 4314 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4315 ins_encode %{ 4316 int vlen_enc = vector_length_encoding(this, $src); 4317 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4318 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4319 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4320 // Note: Since scatter instruction partially updates the opmask register used 4321 // for predication hense moving mask operand to a temporary. 4322 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4323 __ lea($tmp$$Register, $mem$$Address); 4324 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 // ====================REPLICATE======================================= 4330 4331 // Replicate byte scalar to be vector 4332 instruct vReplB_reg(vec dst, rRegI src) %{ 4333 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4334 match(Set dst (Replicate src)); 4335 format %{ "replicateB $dst,$src" %} 4336 ins_encode %{ 4337 uint vlen = Matcher::vector_length(this); 4338 if (UseAVX >= 2) { 4339 int vlen_enc = vector_length_encoding(this); 4340 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4341 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4342 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4343 } else { 4344 __ movdl($dst$$XMMRegister, $src$$Register); 4345 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4346 } 4347 } else { 4348 assert(UseAVX < 2, ""); 4349 __ movdl($dst$$XMMRegister, $src$$Register); 4350 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4351 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4352 if (vlen >= 16) { 4353 assert(vlen == 16, ""); 4354 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4355 } 4356 } 4357 %} 4358 ins_pipe( pipe_slow ); 4359 %} 4360 4361 instruct ReplB_mem(vec dst, memory mem) %{ 4362 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4363 match(Set dst (Replicate (LoadB mem))); 4364 format %{ "replicateB $dst,$mem" %} 4365 ins_encode %{ 4366 int vlen_enc = vector_length_encoding(this); 4367 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4368 %} 4369 ins_pipe( pipe_slow ); 4370 %} 4371 4372 // ====================ReplicateS======================================= 4373 4374 instruct vReplS_reg(vec dst, rRegI src) %{ 4375 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4376 match(Set dst (Replicate src)); 4377 format %{ "replicateS $dst,$src" %} 4378 ins_encode %{ 4379 uint vlen = Matcher::vector_length(this); 4380 int vlen_enc = vector_length_encoding(this); 4381 if (UseAVX >= 2) { 4382 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4383 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4384 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4385 } else { 4386 __ movdl($dst$$XMMRegister, $src$$Register); 4387 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4388 } 4389 } else { 4390 assert(UseAVX < 2, ""); 4391 __ movdl($dst$$XMMRegister, $src$$Register); 4392 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4393 if (vlen >= 8) { 4394 assert(vlen == 8, ""); 4395 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4396 } 4397 } 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4403 match(Set dst (Replicate con)); 4404 effect(TEMP rtmp); 4405 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4406 ins_encode %{ 4407 int vlen_enc = vector_length_encoding(this); 4408 BasicType bt = Matcher::vector_element_basic_type(this); 4409 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4410 __ movl($rtmp$$Register, $con$$constant); 4411 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4417 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4418 match(Set dst (Replicate src)); 4419 effect(TEMP rtmp); 4420 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4421 ins_encode %{ 4422 int vlen_enc = vector_length_encoding(this); 4423 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4424 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct ReplS_mem(vec dst, memory mem) %{ 4430 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4431 match(Set dst (Replicate (LoadS mem))); 4432 format %{ "replicateS $dst,$mem" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this); 4435 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4436 %} 4437 ins_pipe( pipe_slow ); 4438 %} 4439 4440 // ====================ReplicateI======================================= 4441 4442 instruct ReplI_reg(vec dst, rRegI src) %{ 4443 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4444 match(Set dst (Replicate src)); 4445 format %{ "replicateI $dst,$src" %} 4446 ins_encode %{ 4447 uint vlen = Matcher::vector_length(this); 4448 int vlen_enc = vector_length_encoding(this); 4449 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4450 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4451 } else if (VM_Version::supports_avx2()) { 4452 __ movdl($dst$$XMMRegister, $src$$Register); 4453 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4454 } else { 4455 __ movdl($dst$$XMMRegister, $src$$Register); 4456 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4457 } 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct ReplI_mem(vec dst, memory mem) %{ 4463 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4464 match(Set dst (Replicate (LoadI mem))); 4465 format %{ "replicateI $dst,$mem" %} 4466 ins_encode %{ 4467 int vlen_enc = vector_length_encoding(this); 4468 if (VM_Version::supports_avx2()) { 4469 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4470 } else if (VM_Version::supports_avx()) { 4471 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4472 } else { 4473 __ movdl($dst$$XMMRegister, $mem$$Address); 4474 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4475 } 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct ReplI_imm(vec dst, immI con) %{ 4481 predicate(Matcher::is_non_long_integral_vector(n)); 4482 match(Set dst (Replicate con)); 4483 format %{ "replicateI $dst,$con" %} 4484 ins_encode %{ 4485 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4486 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4487 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4488 BasicType bt = Matcher::vector_element_basic_type(this); 4489 int vlen = Matcher::vector_length_in_bytes(this); 4490 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 // Replicate scalar zero to be vector 4496 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4497 predicate(Matcher::is_non_long_integral_vector(n)); 4498 match(Set dst (Replicate zero)); 4499 format %{ "replicateI $dst,$zero" %} 4500 ins_encode %{ 4501 int vlen_enc = vector_length_encoding(this); 4502 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4503 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4504 } else { 4505 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4506 } 4507 %} 4508 ins_pipe( fpu_reg_reg ); 4509 %} 4510 4511 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4512 predicate(Matcher::is_non_long_integral_vector(n)); 4513 match(Set dst (Replicate con)); 4514 format %{ "vallones $dst" %} 4515 ins_encode %{ 4516 int vector_len = vector_length_encoding(this); 4517 __ vallones($dst$$XMMRegister, vector_len); 4518 %} 4519 ins_pipe( pipe_slow ); 4520 %} 4521 4522 // ====================ReplicateL======================================= 4523 4524 // Replicate long (8 byte) scalar to be vector 4525 instruct ReplL_reg(vec dst, rRegL src) %{ 4526 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4527 match(Set dst (Replicate src)); 4528 format %{ "replicateL $dst,$src" %} 4529 ins_encode %{ 4530 int vlen = Matcher::vector_length(this); 4531 int vlen_enc = vector_length_encoding(this); 4532 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4533 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4534 } else if (VM_Version::supports_avx2()) { 4535 __ movdq($dst$$XMMRegister, $src$$Register); 4536 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4537 } else { 4538 __ movdq($dst$$XMMRegister, $src$$Register); 4539 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4540 } 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 instruct ReplL_mem(vec dst, memory mem) %{ 4546 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4547 match(Set dst (Replicate (LoadL mem))); 4548 format %{ "replicateL $dst,$mem" %} 4549 ins_encode %{ 4550 int vlen_enc = vector_length_encoding(this); 4551 if (VM_Version::supports_avx2()) { 4552 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4553 } else if (VM_Version::supports_sse3()) { 4554 __ movddup($dst$$XMMRegister, $mem$$Address); 4555 } else { 4556 __ movq($dst$$XMMRegister, $mem$$Address); 4557 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4558 } 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4564 instruct ReplL_imm(vec dst, immL con) %{ 4565 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4566 match(Set dst (Replicate con)); 4567 format %{ "replicateL $dst,$con" %} 4568 ins_encode %{ 4569 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4570 int vlen = Matcher::vector_length_in_bytes(this); 4571 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4572 %} 4573 ins_pipe( pipe_slow ); 4574 %} 4575 4576 instruct ReplL_zero(vec dst, immL0 zero) %{ 4577 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4578 match(Set dst (Replicate zero)); 4579 format %{ "replicateL $dst,$zero" %} 4580 ins_encode %{ 4581 int vlen_enc = vector_length_encoding(this); 4582 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4583 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4584 } else { 4585 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4586 } 4587 %} 4588 ins_pipe( fpu_reg_reg ); 4589 %} 4590 4591 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4592 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4593 match(Set dst (Replicate con)); 4594 format %{ "vallones $dst" %} 4595 ins_encode %{ 4596 int vector_len = vector_length_encoding(this); 4597 __ vallones($dst$$XMMRegister, vector_len); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 // ====================ReplicateF======================================= 4603 4604 instruct vReplF_reg(vec dst, vlRegF src) %{ 4605 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4606 match(Set dst (Replicate src)); 4607 format %{ "replicateF $dst,$src" %} 4608 ins_encode %{ 4609 uint vlen = Matcher::vector_length(this); 4610 int vlen_enc = vector_length_encoding(this); 4611 if (vlen <= 4) { 4612 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4613 } else if (VM_Version::supports_avx2()) { 4614 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4615 } else { 4616 assert(vlen == 8, "sanity"); 4617 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4618 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4619 } 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 instruct ReplF_reg(vec dst, vlRegF src) %{ 4625 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4626 match(Set dst (Replicate src)); 4627 format %{ "replicateF $dst,$src" %} 4628 ins_encode %{ 4629 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4630 %} 4631 ins_pipe( pipe_slow ); 4632 %} 4633 4634 instruct ReplF_mem(vec dst, memory mem) %{ 4635 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4636 match(Set dst (Replicate (LoadF mem))); 4637 format %{ "replicateF $dst,$mem" %} 4638 ins_encode %{ 4639 int vlen_enc = vector_length_encoding(this); 4640 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 4645 // Replicate float scalar immediate to be vector by loading from const table. 4646 instruct ReplF_imm(vec dst, immF con) %{ 4647 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4648 match(Set dst (Replicate con)); 4649 format %{ "replicateF $dst,$con" %} 4650 ins_encode %{ 4651 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4652 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4653 int vlen = Matcher::vector_length_in_bytes(this); 4654 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 4659 instruct ReplF_zero(vec dst, immF0 zero) %{ 4660 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4661 match(Set dst (Replicate zero)); 4662 format %{ "replicateF $dst,$zero" %} 4663 ins_encode %{ 4664 int vlen_enc = vector_length_encoding(this); 4665 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4666 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4667 } else { 4668 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4669 } 4670 %} 4671 ins_pipe( fpu_reg_reg ); 4672 %} 4673 4674 // ====================ReplicateD======================================= 4675 4676 // Replicate double (8 bytes) scalar to be vector 4677 instruct vReplD_reg(vec dst, vlRegD src) %{ 4678 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4679 match(Set dst (Replicate src)); 4680 format %{ "replicateD $dst,$src" %} 4681 ins_encode %{ 4682 uint vlen = Matcher::vector_length(this); 4683 int vlen_enc = vector_length_encoding(this); 4684 if (vlen <= 2) { 4685 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4686 } else if (VM_Version::supports_avx2()) { 4687 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4688 } else { 4689 assert(vlen == 4, "sanity"); 4690 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4691 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4692 } 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 4697 instruct ReplD_reg(vec dst, vlRegD src) %{ 4698 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4699 match(Set dst (Replicate src)); 4700 format %{ "replicateD $dst,$src" %} 4701 ins_encode %{ 4702 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct ReplD_mem(vec dst, memory mem) %{ 4708 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4709 match(Set dst (Replicate (LoadD mem))); 4710 format %{ "replicateD $dst,$mem" %} 4711 ins_encode %{ 4712 if (Matcher::vector_length(this) >= 4) { 4713 int vlen_enc = vector_length_encoding(this); 4714 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4715 } else { 4716 __ movddup($dst$$XMMRegister, $mem$$Address); 4717 } 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4723 instruct ReplD_imm(vec dst, immD con) %{ 4724 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4725 match(Set dst (Replicate con)); 4726 format %{ "replicateD $dst,$con" %} 4727 ins_encode %{ 4728 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4729 int vlen = Matcher::vector_length_in_bytes(this); 4730 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4731 %} 4732 ins_pipe( pipe_slow ); 4733 %} 4734 4735 instruct ReplD_zero(vec dst, immD0 zero) %{ 4736 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4737 match(Set dst (Replicate zero)); 4738 format %{ "replicateD $dst,$zero" %} 4739 ins_encode %{ 4740 int vlen_enc = vector_length_encoding(this); 4741 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4742 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4743 } else { 4744 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4745 } 4746 %} 4747 ins_pipe( fpu_reg_reg ); 4748 %} 4749 4750 // ====================VECTOR INSERT======================================= 4751 4752 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4753 predicate(Matcher::vector_length_in_bytes(n) < 32); 4754 match(Set dst (VectorInsert (Binary dst val) idx)); 4755 format %{ "vector_insert $dst,$val,$idx" %} 4756 ins_encode %{ 4757 assert(UseSSE >= 4, "required"); 4758 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4759 4760 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4761 4762 assert(is_integral_type(elem_bt), ""); 4763 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4764 4765 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4771 predicate(Matcher::vector_length_in_bytes(n) == 32); 4772 match(Set dst (VectorInsert (Binary src val) idx)); 4773 effect(TEMP vtmp); 4774 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4775 ins_encode %{ 4776 int vlen_enc = Assembler::AVX_256bit; 4777 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4778 int elem_per_lane = 16/type2aelembytes(elem_bt); 4779 int log2epr = log2(elem_per_lane); 4780 4781 assert(is_integral_type(elem_bt), "sanity"); 4782 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4783 4784 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4785 uint y_idx = ($idx$$constant >> log2epr) & 1; 4786 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4787 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4788 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4789 %} 4790 ins_pipe( pipe_slow ); 4791 %} 4792 4793 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4794 predicate(Matcher::vector_length_in_bytes(n) == 64); 4795 match(Set dst (VectorInsert (Binary src val) idx)); 4796 effect(TEMP vtmp); 4797 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4798 ins_encode %{ 4799 assert(UseAVX > 2, "sanity"); 4800 4801 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4802 int elem_per_lane = 16/type2aelembytes(elem_bt); 4803 int log2epr = log2(elem_per_lane); 4804 4805 assert(is_integral_type(elem_bt), ""); 4806 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4807 4808 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4809 uint y_idx = ($idx$$constant >> log2epr) & 3; 4810 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4811 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4812 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4818 predicate(Matcher::vector_length(n) == 2); 4819 match(Set dst (VectorInsert (Binary dst val) idx)); 4820 format %{ "vector_insert $dst,$val,$idx" %} 4821 ins_encode %{ 4822 assert(UseSSE >= 4, "required"); 4823 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4824 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4825 4826 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4827 %} 4828 ins_pipe( pipe_slow ); 4829 %} 4830 4831 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4832 predicate(Matcher::vector_length(n) == 4); 4833 match(Set dst (VectorInsert (Binary src val) idx)); 4834 effect(TEMP vtmp); 4835 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4836 ins_encode %{ 4837 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4838 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4839 4840 uint x_idx = $idx$$constant & right_n_bits(1); 4841 uint y_idx = ($idx$$constant >> 1) & 1; 4842 int vlen_enc = Assembler::AVX_256bit; 4843 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4844 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4845 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4846 %} 4847 ins_pipe( pipe_slow ); 4848 %} 4849 4850 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4851 predicate(Matcher::vector_length(n) == 8); 4852 match(Set dst (VectorInsert (Binary src val) idx)); 4853 effect(TEMP vtmp); 4854 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4855 ins_encode %{ 4856 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4857 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4858 4859 uint x_idx = $idx$$constant & right_n_bits(1); 4860 uint y_idx = ($idx$$constant >> 1) & 3; 4861 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4862 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4863 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4864 %} 4865 ins_pipe( pipe_slow ); 4866 %} 4867 4868 instruct insertF(vec dst, regF val, immU8 idx) %{ 4869 predicate(Matcher::vector_length(n) < 8); 4870 match(Set dst (VectorInsert (Binary dst val) idx)); 4871 format %{ "vector_insert $dst,$val,$idx" %} 4872 ins_encode %{ 4873 assert(UseSSE >= 4, "sanity"); 4874 4875 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4876 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4877 4878 uint x_idx = $idx$$constant & right_n_bits(2); 4879 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4880 %} 4881 ins_pipe( pipe_slow ); 4882 %} 4883 4884 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4885 predicate(Matcher::vector_length(n) >= 8); 4886 match(Set dst (VectorInsert (Binary src val) idx)); 4887 effect(TEMP vtmp); 4888 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4889 ins_encode %{ 4890 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4891 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4892 4893 int vlen = Matcher::vector_length(this); 4894 uint x_idx = $idx$$constant & right_n_bits(2); 4895 if (vlen == 8) { 4896 uint y_idx = ($idx$$constant >> 2) & 1; 4897 int vlen_enc = Assembler::AVX_256bit; 4898 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4899 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4900 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4901 } else { 4902 assert(vlen == 16, "sanity"); 4903 uint y_idx = ($idx$$constant >> 2) & 3; 4904 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4905 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4906 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4907 } 4908 %} 4909 ins_pipe( pipe_slow ); 4910 %} 4911 4912 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4913 predicate(Matcher::vector_length(n) == 2); 4914 match(Set dst (VectorInsert (Binary dst val) idx)); 4915 effect(TEMP tmp); 4916 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4917 ins_encode %{ 4918 assert(UseSSE >= 4, "sanity"); 4919 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4920 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4921 4922 __ movq($tmp$$Register, $val$$XMMRegister); 4923 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4924 %} 4925 ins_pipe( pipe_slow ); 4926 %} 4927 4928 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4929 predicate(Matcher::vector_length(n) == 4); 4930 match(Set dst (VectorInsert (Binary src val) idx)); 4931 effect(TEMP vtmp, TEMP tmp); 4932 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4933 ins_encode %{ 4934 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4935 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4936 4937 uint x_idx = $idx$$constant & right_n_bits(1); 4938 uint y_idx = ($idx$$constant >> 1) & 1; 4939 int vlen_enc = Assembler::AVX_256bit; 4940 __ movq($tmp$$Register, $val$$XMMRegister); 4941 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4942 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4943 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4944 %} 4945 ins_pipe( pipe_slow ); 4946 %} 4947 4948 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4949 predicate(Matcher::vector_length(n) == 8); 4950 match(Set dst (VectorInsert (Binary src val) idx)); 4951 effect(TEMP tmp, TEMP vtmp); 4952 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4953 ins_encode %{ 4954 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4955 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4956 4957 uint x_idx = $idx$$constant & right_n_bits(1); 4958 uint y_idx = ($idx$$constant >> 1) & 3; 4959 __ movq($tmp$$Register, $val$$XMMRegister); 4960 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4961 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4962 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 // ====================REDUCTION ARITHMETIC======================================= 4968 4969 // =======================Int Reduction========================================== 4970 4971 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4972 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4973 match(Set dst (AddReductionVI src1 src2)); 4974 match(Set dst (MulReductionVI src1 src2)); 4975 match(Set dst (AndReductionV src1 src2)); 4976 match(Set dst ( OrReductionV src1 src2)); 4977 match(Set dst (XorReductionV src1 src2)); 4978 match(Set dst (MinReductionV src1 src2)); 4979 match(Set dst (MaxReductionV src1 src2)); 4980 effect(TEMP vtmp1, TEMP vtmp2); 4981 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4982 ins_encode %{ 4983 int opcode = this->ideal_Opcode(); 4984 int vlen = Matcher::vector_length(this, $src2); 4985 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 // =======================Long Reduction========================================== 4991 4992 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4993 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4994 match(Set dst (AddReductionVL src1 src2)); 4995 match(Set dst (MulReductionVL src1 src2)); 4996 match(Set dst (AndReductionV src1 src2)); 4997 match(Set dst ( OrReductionV src1 src2)); 4998 match(Set dst (XorReductionV src1 src2)); 4999 match(Set dst (MinReductionV src1 src2)); 5000 match(Set dst (MaxReductionV src1 src2)); 5001 effect(TEMP vtmp1, TEMP vtmp2); 5002 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5003 ins_encode %{ 5004 int opcode = this->ideal_Opcode(); 5005 int vlen = Matcher::vector_length(this, $src2); 5006 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5012 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5013 match(Set dst (AddReductionVL src1 src2)); 5014 match(Set dst (MulReductionVL src1 src2)); 5015 match(Set dst (AndReductionV src1 src2)); 5016 match(Set dst ( OrReductionV src1 src2)); 5017 match(Set dst (XorReductionV src1 src2)); 5018 match(Set dst (MinReductionV src1 src2)); 5019 match(Set dst (MaxReductionV src1 src2)); 5020 effect(TEMP vtmp1, TEMP vtmp2); 5021 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5022 ins_encode %{ 5023 int opcode = this->ideal_Opcode(); 5024 int vlen = Matcher::vector_length(this, $src2); 5025 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5026 %} 5027 ins_pipe( pipe_slow ); 5028 %} 5029 5030 // =======================Float Reduction========================================== 5031 5032 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5033 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5034 match(Set dst (AddReductionVF dst src)); 5035 match(Set dst (MulReductionVF dst src)); 5036 effect(TEMP dst, TEMP vtmp); 5037 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5038 ins_encode %{ 5039 int opcode = this->ideal_Opcode(); 5040 int vlen = Matcher::vector_length(this, $src); 5041 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5042 %} 5043 ins_pipe( pipe_slow ); 5044 %} 5045 5046 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5047 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5048 match(Set dst (AddReductionVF dst src)); 5049 match(Set dst (MulReductionVF dst src)); 5050 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5051 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5052 ins_encode %{ 5053 int opcode = this->ideal_Opcode(); 5054 int vlen = Matcher::vector_length(this, $src); 5055 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5056 %} 5057 ins_pipe( pipe_slow ); 5058 %} 5059 5060 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5061 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5062 match(Set dst (AddReductionVF dst src)); 5063 match(Set dst (MulReductionVF dst src)); 5064 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5065 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5066 ins_encode %{ 5067 int opcode = this->ideal_Opcode(); 5068 int vlen = Matcher::vector_length(this, $src); 5069 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5070 %} 5071 ins_pipe( pipe_slow ); 5072 %} 5073 5074 5075 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5076 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5077 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5078 // src1 contains reduction identity 5079 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5080 match(Set dst (AddReductionVF src1 src2)); 5081 match(Set dst (MulReductionVF src1 src2)); 5082 effect(TEMP dst); 5083 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5084 ins_encode %{ 5085 int opcode = this->ideal_Opcode(); 5086 int vlen = Matcher::vector_length(this, $src2); 5087 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5093 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5094 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5095 // src1 contains reduction identity 5096 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5097 match(Set dst (AddReductionVF src1 src2)); 5098 match(Set dst (MulReductionVF src1 src2)); 5099 effect(TEMP dst, TEMP vtmp); 5100 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5101 ins_encode %{ 5102 int opcode = this->ideal_Opcode(); 5103 int vlen = Matcher::vector_length(this, $src2); 5104 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5105 %} 5106 ins_pipe( pipe_slow ); 5107 %} 5108 5109 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5110 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5111 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5112 // src1 contains reduction identity 5113 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5114 match(Set dst (AddReductionVF src1 src2)); 5115 match(Set dst (MulReductionVF src1 src2)); 5116 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5117 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5118 ins_encode %{ 5119 int opcode = this->ideal_Opcode(); 5120 int vlen = Matcher::vector_length(this, $src2); 5121 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5127 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5128 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5129 // src1 contains reduction identity 5130 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5131 match(Set dst (AddReductionVF src1 src2)); 5132 match(Set dst (MulReductionVF src1 src2)); 5133 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5134 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5135 ins_encode %{ 5136 int opcode = this->ideal_Opcode(); 5137 int vlen = Matcher::vector_length(this, $src2); 5138 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 %} 5142 5143 // =======================Double Reduction========================================== 5144 5145 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5146 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5147 match(Set dst (AddReductionVD dst src)); 5148 match(Set dst (MulReductionVD dst src)); 5149 effect(TEMP dst, TEMP vtmp); 5150 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5151 ins_encode %{ 5152 int opcode = this->ideal_Opcode(); 5153 int vlen = Matcher::vector_length(this, $src); 5154 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5155 %} 5156 ins_pipe( pipe_slow ); 5157 %} 5158 5159 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5160 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5161 match(Set dst (AddReductionVD dst src)); 5162 match(Set dst (MulReductionVD dst src)); 5163 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5164 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5165 ins_encode %{ 5166 int opcode = this->ideal_Opcode(); 5167 int vlen = Matcher::vector_length(this, $src); 5168 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5169 %} 5170 ins_pipe( pipe_slow ); 5171 %} 5172 5173 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5174 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5175 match(Set dst (AddReductionVD dst src)); 5176 match(Set dst (MulReductionVD dst src)); 5177 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5178 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5179 ins_encode %{ 5180 int opcode = this->ideal_Opcode(); 5181 int vlen = Matcher::vector_length(this, $src); 5182 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5188 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5189 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5190 // src1 contains reduction identity 5191 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5192 match(Set dst (AddReductionVD src1 src2)); 5193 match(Set dst (MulReductionVD src1 src2)); 5194 effect(TEMP dst); 5195 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5196 ins_encode %{ 5197 int opcode = this->ideal_Opcode(); 5198 int vlen = Matcher::vector_length(this, $src2); 5199 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5200 %} 5201 ins_pipe( pipe_slow ); 5202 %} 5203 5204 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5205 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5206 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5207 // src1 contains reduction identity 5208 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5209 match(Set dst (AddReductionVD src1 src2)); 5210 match(Set dst (MulReductionVD src1 src2)); 5211 effect(TEMP dst, TEMP vtmp); 5212 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5213 ins_encode %{ 5214 int opcode = this->ideal_Opcode(); 5215 int vlen = Matcher::vector_length(this, $src2); 5216 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5222 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5223 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5224 // src1 contains reduction identity 5225 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5226 match(Set dst (AddReductionVD src1 src2)); 5227 match(Set dst (MulReductionVD src1 src2)); 5228 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5229 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5230 ins_encode %{ 5231 int opcode = this->ideal_Opcode(); 5232 int vlen = Matcher::vector_length(this, $src2); 5233 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5234 %} 5235 ins_pipe( pipe_slow ); 5236 %} 5237 5238 // =======================Byte Reduction========================================== 5239 5240 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5241 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5242 match(Set dst (AddReductionVI src1 src2)); 5243 match(Set dst (AndReductionV src1 src2)); 5244 match(Set dst ( OrReductionV src1 src2)); 5245 match(Set dst (XorReductionV src1 src2)); 5246 match(Set dst (MinReductionV src1 src2)); 5247 match(Set dst (MaxReductionV src1 src2)); 5248 effect(TEMP vtmp1, TEMP vtmp2); 5249 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5250 ins_encode %{ 5251 int opcode = this->ideal_Opcode(); 5252 int vlen = Matcher::vector_length(this, $src2); 5253 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5254 %} 5255 ins_pipe( pipe_slow ); 5256 %} 5257 5258 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5259 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5260 match(Set dst (AddReductionVI src1 src2)); 5261 match(Set dst (AndReductionV src1 src2)); 5262 match(Set dst ( OrReductionV src1 src2)); 5263 match(Set dst (XorReductionV src1 src2)); 5264 match(Set dst (MinReductionV src1 src2)); 5265 match(Set dst (MaxReductionV src1 src2)); 5266 effect(TEMP vtmp1, TEMP vtmp2); 5267 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5268 ins_encode %{ 5269 int opcode = this->ideal_Opcode(); 5270 int vlen = Matcher::vector_length(this, $src2); 5271 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5272 %} 5273 ins_pipe( pipe_slow ); 5274 %} 5275 5276 // =======================Short Reduction========================================== 5277 5278 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5279 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5280 match(Set dst (AddReductionVI src1 src2)); 5281 match(Set dst (MulReductionVI src1 src2)); 5282 match(Set dst (AndReductionV src1 src2)); 5283 match(Set dst ( OrReductionV src1 src2)); 5284 match(Set dst (XorReductionV src1 src2)); 5285 match(Set dst (MinReductionV src1 src2)); 5286 match(Set dst (MaxReductionV src1 src2)); 5287 effect(TEMP vtmp1, TEMP vtmp2); 5288 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5289 ins_encode %{ 5290 int opcode = this->ideal_Opcode(); 5291 int vlen = Matcher::vector_length(this, $src2); 5292 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 5297 // =======================Mul Reduction========================================== 5298 5299 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5300 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5301 Matcher::vector_length(n->in(2)) <= 32); // src2 5302 match(Set dst (MulReductionVI src1 src2)); 5303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5304 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5305 ins_encode %{ 5306 int opcode = this->ideal_Opcode(); 5307 int vlen = Matcher::vector_length(this, $src2); 5308 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5309 %} 5310 ins_pipe( pipe_slow ); 5311 %} 5312 5313 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5314 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5315 Matcher::vector_length(n->in(2)) == 64); // src2 5316 match(Set dst (MulReductionVI src1 src2)); 5317 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5318 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5319 ins_encode %{ 5320 int opcode = this->ideal_Opcode(); 5321 int vlen = Matcher::vector_length(this, $src2); 5322 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5323 %} 5324 ins_pipe( pipe_slow ); 5325 %} 5326 5327 //--------------------Min/Max Float Reduction -------------------- 5328 // Float Min Reduction 5329 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5330 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5331 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5332 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5333 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5334 Matcher::vector_length(n->in(2)) == 2); 5335 match(Set dst (MinReductionV src1 src2)); 5336 match(Set dst (MaxReductionV src1 src2)); 5337 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5338 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5339 ins_encode %{ 5340 assert(UseAVX > 0, "sanity"); 5341 5342 int opcode = this->ideal_Opcode(); 5343 int vlen = Matcher::vector_length(this, $src2); 5344 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5345 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5351 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5352 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5353 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5354 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5355 Matcher::vector_length(n->in(2)) >= 4); 5356 match(Set dst (MinReductionV src1 src2)); 5357 match(Set dst (MaxReductionV src1 src2)); 5358 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5359 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5360 ins_encode %{ 5361 assert(UseAVX > 0, "sanity"); 5362 5363 int opcode = this->ideal_Opcode(); 5364 int vlen = Matcher::vector_length(this, $src2); 5365 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5366 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5372 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5373 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5374 Matcher::vector_length(n->in(2)) == 2); 5375 match(Set dst (MinReductionV dst src)); 5376 match(Set dst (MaxReductionV dst src)); 5377 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5378 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5379 ins_encode %{ 5380 assert(UseAVX > 0, "sanity"); 5381 5382 int opcode = this->ideal_Opcode(); 5383 int vlen = Matcher::vector_length(this, $src); 5384 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5385 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5386 %} 5387 ins_pipe( pipe_slow ); 5388 %} 5389 5390 5391 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5392 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5393 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5394 Matcher::vector_length(n->in(2)) >= 4); 5395 match(Set dst (MinReductionV dst src)); 5396 match(Set dst (MaxReductionV dst src)); 5397 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5398 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5399 ins_encode %{ 5400 assert(UseAVX > 0, "sanity"); 5401 5402 int opcode = this->ideal_Opcode(); 5403 int vlen = Matcher::vector_length(this, $src); 5404 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5405 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5406 %} 5407 ins_pipe( pipe_slow ); 5408 %} 5409 5410 5411 //--------------------Min Double Reduction -------------------- 5412 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5413 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5414 rFlagsReg cr) %{ 5415 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5416 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5417 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5418 Matcher::vector_length(n->in(2)) == 2); 5419 match(Set dst (MinReductionV src1 src2)); 5420 match(Set dst (MaxReductionV src1 src2)); 5421 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5422 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5423 ins_encode %{ 5424 assert(UseAVX > 0, "sanity"); 5425 5426 int opcode = this->ideal_Opcode(); 5427 int vlen = Matcher::vector_length(this, $src2); 5428 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5429 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5430 %} 5431 ins_pipe( pipe_slow ); 5432 %} 5433 5434 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5435 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5436 rFlagsReg cr) %{ 5437 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5438 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5439 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5440 Matcher::vector_length(n->in(2)) >= 4); 5441 match(Set dst (MinReductionV src1 src2)); 5442 match(Set dst (MaxReductionV src1 src2)); 5443 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5444 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5445 ins_encode %{ 5446 assert(UseAVX > 0, "sanity"); 5447 5448 int opcode = this->ideal_Opcode(); 5449 int vlen = Matcher::vector_length(this, $src2); 5450 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5451 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5452 %} 5453 ins_pipe( pipe_slow ); 5454 %} 5455 5456 5457 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5458 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5459 rFlagsReg cr) %{ 5460 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5461 Matcher::vector_length(n->in(2)) == 2); 5462 match(Set dst (MinReductionV dst src)); 5463 match(Set dst (MaxReductionV dst src)); 5464 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5465 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5466 ins_encode %{ 5467 assert(UseAVX > 0, "sanity"); 5468 5469 int opcode = this->ideal_Opcode(); 5470 int vlen = Matcher::vector_length(this, $src); 5471 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5472 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5473 %} 5474 ins_pipe( pipe_slow ); 5475 %} 5476 5477 instruct minmax_reductionD_av(legRegD dst, legVec src, 5478 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5479 rFlagsReg cr) %{ 5480 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5481 Matcher::vector_length(n->in(2)) >= 4); 5482 match(Set dst (MinReductionV dst src)); 5483 match(Set dst (MaxReductionV dst src)); 5484 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5485 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5486 ins_encode %{ 5487 assert(UseAVX > 0, "sanity"); 5488 5489 int opcode = this->ideal_Opcode(); 5490 int vlen = Matcher::vector_length(this, $src); 5491 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5492 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 // ====================VECTOR ARITHMETIC======================================= 5498 5499 // --------------------------------- ADD -------------------------------------- 5500 5501 // Bytes vector add 5502 instruct vaddB(vec dst, vec src) %{ 5503 predicate(UseAVX == 0); 5504 match(Set dst (AddVB dst src)); 5505 format %{ "paddb $dst,$src\t! add packedB" %} 5506 ins_encode %{ 5507 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5513 predicate(UseAVX > 0); 5514 match(Set dst (AddVB src1 src2)); 5515 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5516 ins_encode %{ 5517 int vlen_enc = vector_length_encoding(this); 5518 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5519 %} 5520 ins_pipe( pipe_slow ); 5521 %} 5522 5523 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5524 predicate((UseAVX > 0) && 5525 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5526 match(Set dst (AddVB src (LoadVector mem))); 5527 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5528 ins_encode %{ 5529 int vlen_enc = vector_length_encoding(this); 5530 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5531 %} 5532 ins_pipe( pipe_slow ); 5533 %} 5534 5535 // Shorts/Chars vector add 5536 instruct vaddS(vec dst, vec src) %{ 5537 predicate(UseAVX == 0); 5538 match(Set dst (AddVS dst src)); 5539 format %{ "paddw $dst,$src\t! add packedS" %} 5540 ins_encode %{ 5541 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5542 %} 5543 ins_pipe( pipe_slow ); 5544 %} 5545 5546 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5547 predicate(UseAVX > 0); 5548 match(Set dst (AddVS src1 src2)); 5549 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5550 ins_encode %{ 5551 int vlen_enc = vector_length_encoding(this); 5552 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5553 %} 5554 ins_pipe( pipe_slow ); 5555 %} 5556 5557 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5558 predicate((UseAVX > 0) && 5559 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5560 match(Set dst (AddVS src (LoadVector mem))); 5561 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5562 ins_encode %{ 5563 int vlen_enc = vector_length_encoding(this); 5564 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5565 %} 5566 ins_pipe( pipe_slow ); 5567 %} 5568 5569 // Integers vector add 5570 instruct vaddI(vec dst, vec src) %{ 5571 predicate(UseAVX == 0); 5572 match(Set dst (AddVI dst src)); 5573 format %{ "paddd $dst,$src\t! add packedI" %} 5574 ins_encode %{ 5575 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5581 predicate(UseAVX > 0); 5582 match(Set dst (AddVI src1 src2)); 5583 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5584 ins_encode %{ 5585 int vlen_enc = vector_length_encoding(this); 5586 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 5592 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5593 predicate((UseAVX > 0) && 5594 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5595 match(Set dst (AddVI src (LoadVector mem))); 5596 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5597 ins_encode %{ 5598 int vlen_enc = vector_length_encoding(this); 5599 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5600 %} 5601 ins_pipe( pipe_slow ); 5602 %} 5603 5604 // Longs vector add 5605 instruct vaddL(vec dst, vec src) %{ 5606 predicate(UseAVX == 0); 5607 match(Set dst (AddVL dst src)); 5608 format %{ "paddq $dst,$src\t! add packedL" %} 5609 ins_encode %{ 5610 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5611 %} 5612 ins_pipe( pipe_slow ); 5613 %} 5614 5615 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5616 predicate(UseAVX > 0); 5617 match(Set dst (AddVL src1 src2)); 5618 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5619 ins_encode %{ 5620 int vlen_enc = vector_length_encoding(this); 5621 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5627 predicate((UseAVX > 0) && 5628 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5629 match(Set dst (AddVL src (LoadVector mem))); 5630 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5631 ins_encode %{ 5632 int vlen_enc = vector_length_encoding(this); 5633 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5634 %} 5635 ins_pipe( pipe_slow ); 5636 %} 5637 5638 // Floats vector add 5639 instruct vaddF(vec dst, vec src) %{ 5640 predicate(UseAVX == 0); 5641 match(Set dst (AddVF dst src)); 5642 format %{ "addps $dst,$src\t! add packedF" %} 5643 ins_encode %{ 5644 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5650 predicate(UseAVX > 0); 5651 match(Set dst (AddVF src1 src2)); 5652 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5653 ins_encode %{ 5654 int vlen_enc = vector_length_encoding(this); 5655 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5661 predicate((UseAVX > 0) && 5662 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5663 match(Set dst (AddVF src (LoadVector mem))); 5664 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5665 ins_encode %{ 5666 int vlen_enc = vector_length_encoding(this); 5667 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 // Doubles vector add 5673 instruct vaddD(vec dst, vec src) %{ 5674 predicate(UseAVX == 0); 5675 match(Set dst (AddVD dst src)); 5676 format %{ "addpd $dst,$src\t! add packedD" %} 5677 ins_encode %{ 5678 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5682 5683 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5684 predicate(UseAVX > 0); 5685 match(Set dst (AddVD src1 src2)); 5686 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5687 ins_encode %{ 5688 int vlen_enc = vector_length_encoding(this); 5689 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5695 predicate((UseAVX > 0) && 5696 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5697 match(Set dst (AddVD src (LoadVector mem))); 5698 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5699 ins_encode %{ 5700 int vlen_enc = vector_length_encoding(this); 5701 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 // --------------------------------- SUB -------------------------------------- 5707 5708 // Bytes vector sub 5709 instruct vsubB(vec dst, vec src) %{ 5710 predicate(UseAVX == 0); 5711 match(Set dst (SubVB dst src)); 5712 format %{ "psubb $dst,$src\t! sub packedB" %} 5713 ins_encode %{ 5714 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5715 %} 5716 ins_pipe( pipe_slow ); 5717 %} 5718 5719 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5720 predicate(UseAVX > 0); 5721 match(Set dst (SubVB src1 src2)); 5722 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5723 ins_encode %{ 5724 int vlen_enc = vector_length_encoding(this); 5725 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5726 %} 5727 ins_pipe( pipe_slow ); 5728 %} 5729 5730 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5731 predicate((UseAVX > 0) && 5732 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5733 match(Set dst (SubVB src (LoadVector mem))); 5734 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5735 ins_encode %{ 5736 int vlen_enc = vector_length_encoding(this); 5737 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5738 %} 5739 ins_pipe( pipe_slow ); 5740 %} 5741 5742 // Shorts/Chars vector sub 5743 instruct vsubS(vec dst, vec src) %{ 5744 predicate(UseAVX == 0); 5745 match(Set dst (SubVS dst src)); 5746 format %{ "psubw $dst,$src\t! sub packedS" %} 5747 ins_encode %{ 5748 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5749 %} 5750 ins_pipe( pipe_slow ); 5751 %} 5752 5753 5754 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5755 predicate(UseAVX > 0); 5756 match(Set dst (SubVS src1 src2)); 5757 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5758 ins_encode %{ 5759 int vlen_enc = vector_length_encoding(this); 5760 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5761 %} 5762 ins_pipe( pipe_slow ); 5763 %} 5764 5765 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5766 predicate((UseAVX > 0) && 5767 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5768 match(Set dst (SubVS src (LoadVector mem))); 5769 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5770 ins_encode %{ 5771 int vlen_enc = vector_length_encoding(this); 5772 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 // Integers vector sub 5778 instruct vsubI(vec dst, vec src) %{ 5779 predicate(UseAVX == 0); 5780 match(Set dst (SubVI dst src)); 5781 format %{ "psubd $dst,$src\t! sub packedI" %} 5782 ins_encode %{ 5783 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5784 %} 5785 ins_pipe( pipe_slow ); 5786 %} 5787 5788 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5789 predicate(UseAVX > 0); 5790 match(Set dst (SubVI src1 src2)); 5791 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5792 ins_encode %{ 5793 int vlen_enc = vector_length_encoding(this); 5794 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5795 %} 5796 ins_pipe( pipe_slow ); 5797 %} 5798 5799 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5800 predicate((UseAVX > 0) && 5801 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5802 match(Set dst (SubVI src (LoadVector mem))); 5803 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5804 ins_encode %{ 5805 int vlen_enc = vector_length_encoding(this); 5806 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5807 %} 5808 ins_pipe( pipe_slow ); 5809 %} 5810 5811 // Longs vector sub 5812 instruct vsubL(vec dst, vec src) %{ 5813 predicate(UseAVX == 0); 5814 match(Set dst (SubVL dst src)); 5815 format %{ "psubq $dst,$src\t! sub packedL" %} 5816 ins_encode %{ 5817 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5823 predicate(UseAVX > 0); 5824 match(Set dst (SubVL src1 src2)); 5825 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5826 ins_encode %{ 5827 int vlen_enc = vector_length_encoding(this); 5828 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5829 %} 5830 ins_pipe( pipe_slow ); 5831 %} 5832 5833 5834 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5835 predicate((UseAVX > 0) && 5836 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5837 match(Set dst (SubVL src (LoadVector mem))); 5838 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5839 ins_encode %{ 5840 int vlen_enc = vector_length_encoding(this); 5841 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5842 %} 5843 ins_pipe( pipe_slow ); 5844 %} 5845 5846 // Floats vector sub 5847 instruct vsubF(vec dst, vec src) %{ 5848 predicate(UseAVX == 0); 5849 match(Set dst (SubVF dst src)); 5850 format %{ "subps $dst,$src\t! sub packedF" %} 5851 ins_encode %{ 5852 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5853 %} 5854 ins_pipe( pipe_slow ); 5855 %} 5856 5857 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5858 predicate(UseAVX > 0); 5859 match(Set dst (SubVF src1 src2)); 5860 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5861 ins_encode %{ 5862 int vlen_enc = vector_length_encoding(this); 5863 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5864 %} 5865 ins_pipe( pipe_slow ); 5866 %} 5867 5868 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5869 predicate((UseAVX > 0) && 5870 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5871 match(Set dst (SubVF src (LoadVector mem))); 5872 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5873 ins_encode %{ 5874 int vlen_enc = vector_length_encoding(this); 5875 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 // Doubles vector sub 5881 instruct vsubD(vec dst, vec src) %{ 5882 predicate(UseAVX == 0); 5883 match(Set dst (SubVD dst src)); 5884 format %{ "subpd $dst,$src\t! sub packedD" %} 5885 ins_encode %{ 5886 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5892 predicate(UseAVX > 0); 5893 match(Set dst (SubVD src1 src2)); 5894 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5895 ins_encode %{ 5896 int vlen_enc = vector_length_encoding(this); 5897 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5903 predicate((UseAVX > 0) && 5904 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5905 match(Set dst (SubVD src (LoadVector mem))); 5906 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5907 ins_encode %{ 5908 int vlen_enc = vector_length_encoding(this); 5909 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 // --------------------------------- MUL -------------------------------------- 5915 5916 // Byte vector mul 5917 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5918 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5919 match(Set dst (MulVB src1 src2)); 5920 effect(TEMP dst, TEMP xtmp); 5921 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5922 ins_encode %{ 5923 assert(UseSSE > 3, "required"); 5924 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5925 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5926 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5927 __ psllw($dst$$XMMRegister, 8); 5928 __ psrlw($dst$$XMMRegister, 8); 5929 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5930 %} 5931 ins_pipe( pipe_slow ); 5932 %} 5933 5934 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5935 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5936 match(Set dst (MulVB src1 src2)); 5937 effect(TEMP dst, TEMP xtmp); 5938 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5939 ins_encode %{ 5940 assert(UseSSE > 3, "required"); 5941 // Odd-index elements 5942 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5943 __ psrlw($dst$$XMMRegister, 8); 5944 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5945 __ psrlw($xtmp$$XMMRegister, 8); 5946 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5947 __ psllw($dst$$XMMRegister, 8); 5948 // Even-index elements 5949 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5950 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5951 __ psllw($xtmp$$XMMRegister, 8); 5952 __ psrlw($xtmp$$XMMRegister, 8); 5953 // Combine 5954 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5960 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5961 match(Set dst (MulVB src1 src2)); 5962 effect(TEMP xtmp1, TEMP xtmp2); 5963 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5964 ins_encode %{ 5965 int vlen_enc = vector_length_encoding(this); 5966 // Odd-index elements 5967 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5968 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5969 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5970 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5971 // Even-index elements 5972 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5973 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5974 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5975 // Combine 5976 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5977 %} 5978 ins_pipe( pipe_slow ); 5979 %} 5980 5981 // Shorts/Chars vector mul 5982 instruct vmulS(vec dst, vec src) %{ 5983 predicate(UseAVX == 0); 5984 match(Set dst (MulVS dst src)); 5985 format %{ "pmullw $dst,$src\t! mul packedS" %} 5986 ins_encode %{ 5987 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5988 %} 5989 ins_pipe( pipe_slow ); 5990 %} 5991 5992 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5993 predicate(UseAVX > 0); 5994 match(Set dst (MulVS src1 src2)); 5995 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5996 ins_encode %{ 5997 int vlen_enc = vector_length_encoding(this); 5998 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5999 %} 6000 ins_pipe( pipe_slow ); 6001 %} 6002 6003 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6004 predicate((UseAVX > 0) && 6005 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6006 match(Set dst (MulVS src (LoadVector mem))); 6007 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6008 ins_encode %{ 6009 int vlen_enc = vector_length_encoding(this); 6010 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6011 %} 6012 ins_pipe( pipe_slow ); 6013 %} 6014 6015 // Integers vector mul 6016 instruct vmulI(vec dst, vec src) %{ 6017 predicate(UseAVX == 0); 6018 match(Set dst (MulVI dst src)); 6019 format %{ "pmulld $dst,$src\t! mul packedI" %} 6020 ins_encode %{ 6021 assert(UseSSE > 3, "required"); 6022 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6028 predicate(UseAVX > 0); 6029 match(Set dst (MulVI src1 src2)); 6030 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6031 ins_encode %{ 6032 int vlen_enc = vector_length_encoding(this); 6033 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6039 predicate((UseAVX > 0) && 6040 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6041 match(Set dst (MulVI src (LoadVector mem))); 6042 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6043 ins_encode %{ 6044 int vlen_enc = vector_length_encoding(this); 6045 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6046 %} 6047 ins_pipe( pipe_slow ); 6048 %} 6049 6050 // Longs vector mul 6051 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6052 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6053 VM_Version::supports_avx512dq()) || 6054 VM_Version::supports_avx512vldq()); 6055 match(Set dst (MulVL src1 src2)); 6056 ins_cost(500); 6057 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6058 ins_encode %{ 6059 assert(UseAVX > 2, "required"); 6060 int vlen_enc = vector_length_encoding(this); 6061 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6067 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6068 VM_Version::supports_avx512dq()) || 6069 (Matcher::vector_length_in_bytes(n) > 8 && 6070 VM_Version::supports_avx512vldq())); 6071 match(Set dst (MulVL src (LoadVector mem))); 6072 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6073 ins_cost(500); 6074 ins_encode %{ 6075 assert(UseAVX > 2, "required"); 6076 int vlen_enc = vector_length_encoding(this); 6077 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6083 predicate(UseAVX == 0); 6084 match(Set dst (MulVL src1 src2)); 6085 ins_cost(500); 6086 effect(TEMP dst, TEMP xtmp); 6087 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6088 ins_encode %{ 6089 assert(VM_Version::supports_sse4_1(), "required"); 6090 // Get the lo-hi products, only the lower 32 bits is in concerns 6091 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6092 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6093 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6094 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6095 __ psllq($dst$$XMMRegister, 32); 6096 // Get the lo-lo products 6097 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6098 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6099 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6100 %} 6101 ins_pipe( pipe_slow ); 6102 %} 6103 6104 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6105 predicate(UseAVX > 0 && 6106 ((Matcher::vector_length_in_bytes(n) == 64 && 6107 !VM_Version::supports_avx512dq()) || 6108 (Matcher::vector_length_in_bytes(n) < 64 && 6109 !VM_Version::supports_avx512vldq()))); 6110 match(Set dst (MulVL src1 src2)); 6111 effect(TEMP xtmp1, TEMP xtmp2); 6112 ins_cost(500); 6113 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6114 ins_encode %{ 6115 int vlen_enc = vector_length_encoding(this); 6116 // Get the lo-hi products, only the lower 32 bits is in concerns 6117 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6118 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6119 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6120 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6121 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6122 // Get the lo-lo products 6123 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6124 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6125 %} 6126 ins_pipe( pipe_slow ); 6127 %} 6128 6129 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6130 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6131 match(Set dst (MulVL src1 src2)); 6132 ins_cost(100); 6133 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6134 ins_encode %{ 6135 int vlen_enc = vector_length_encoding(this); 6136 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6137 %} 6138 ins_pipe( pipe_slow ); 6139 %} 6140 6141 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6142 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6143 match(Set dst (MulVL src1 src2)); 6144 ins_cost(100); 6145 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6146 ins_encode %{ 6147 int vlen_enc = vector_length_encoding(this); 6148 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 // Floats vector mul 6154 instruct vmulF(vec dst, vec src) %{ 6155 predicate(UseAVX == 0); 6156 match(Set dst (MulVF dst src)); 6157 format %{ "mulps $dst,$src\t! mul packedF" %} 6158 ins_encode %{ 6159 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6165 predicate(UseAVX > 0); 6166 match(Set dst (MulVF src1 src2)); 6167 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6168 ins_encode %{ 6169 int vlen_enc = vector_length_encoding(this); 6170 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6171 %} 6172 ins_pipe( pipe_slow ); 6173 %} 6174 6175 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6176 predicate((UseAVX > 0) && 6177 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6178 match(Set dst (MulVF src (LoadVector mem))); 6179 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6180 ins_encode %{ 6181 int vlen_enc = vector_length_encoding(this); 6182 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 // Doubles vector mul 6188 instruct vmulD(vec dst, vec src) %{ 6189 predicate(UseAVX == 0); 6190 match(Set dst (MulVD dst src)); 6191 format %{ "mulpd $dst,$src\t! mul packedD" %} 6192 ins_encode %{ 6193 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6199 predicate(UseAVX > 0); 6200 match(Set dst (MulVD src1 src2)); 6201 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6202 ins_encode %{ 6203 int vlen_enc = vector_length_encoding(this); 6204 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6210 predicate((UseAVX > 0) && 6211 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6212 match(Set dst (MulVD src (LoadVector mem))); 6213 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6214 ins_encode %{ 6215 int vlen_enc = vector_length_encoding(this); 6216 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6217 %} 6218 ins_pipe( pipe_slow ); 6219 %} 6220 6221 // --------------------------------- DIV -------------------------------------- 6222 6223 // Floats vector div 6224 instruct vdivF(vec dst, vec src) %{ 6225 predicate(UseAVX == 0); 6226 match(Set dst (DivVF dst src)); 6227 format %{ "divps $dst,$src\t! div packedF" %} 6228 ins_encode %{ 6229 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6230 %} 6231 ins_pipe( pipe_slow ); 6232 %} 6233 6234 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6235 predicate(UseAVX > 0); 6236 match(Set dst (DivVF src1 src2)); 6237 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6238 ins_encode %{ 6239 int vlen_enc = vector_length_encoding(this); 6240 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6241 %} 6242 ins_pipe( pipe_slow ); 6243 %} 6244 6245 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6246 predicate((UseAVX > 0) && 6247 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6248 match(Set dst (DivVF src (LoadVector mem))); 6249 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6250 ins_encode %{ 6251 int vlen_enc = vector_length_encoding(this); 6252 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 // Doubles vector div 6258 instruct vdivD(vec dst, vec src) %{ 6259 predicate(UseAVX == 0); 6260 match(Set dst (DivVD dst src)); 6261 format %{ "divpd $dst,$src\t! div packedD" %} 6262 ins_encode %{ 6263 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6269 predicate(UseAVX > 0); 6270 match(Set dst (DivVD src1 src2)); 6271 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6272 ins_encode %{ 6273 int vlen_enc = vector_length_encoding(this); 6274 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6280 predicate((UseAVX > 0) && 6281 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6282 match(Set dst (DivVD src (LoadVector mem))); 6283 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6284 ins_encode %{ 6285 int vlen_enc = vector_length_encoding(this); 6286 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6287 %} 6288 ins_pipe( pipe_slow ); 6289 %} 6290 6291 // ------------------------------ MinMax --------------------------------------- 6292 6293 // Byte, Short, Int vector Min/Max 6294 instruct minmax_reg_sse(vec dst, vec src) %{ 6295 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6296 UseAVX == 0); 6297 match(Set dst (MinV dst src)); 6298 match(Set dst (MaxV dst src)); 6299 format %{ "vector_minmax $dst,$src\t! " %} 6300 ins_encode %{ 6301 assert(UseSSE >= 4, "required"); 6302 6303 int opcode = this->ideal_Opcode(); 6304 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6305 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6306 %} 6307 ins_pipe( pipe_slow ); 6308 %} 6309 6310 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6311 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6312 UseAVX > 0); 6313 match(Set dst (MinV src1 src2)); 6314 match(Set dst (MaxV src1 src2)); 6315 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6316 ins_encode %{ 6317 int opcode = this->ideal_Opcode(); 6318 int vlen_enc = vector_length_encoding(this); 6319 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6320 6321 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 // Long vector Min/Max 6327 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6328 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6329 UseAVX == 0); 6330 match(Set dst (MinV dst src)); 6331 match(Set dst (MaxV src dst)); 6332 effect(TEMP dst, TEMP tmp); 6333 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6334 ins_encode %{ 6335 assert(UseSSE >= 4, "required"); 6336 6337 int opcode = this->ideal_Opcode(); 6338 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6339 assert(elem_bt == T_LONG, "sanity"); 6340 6341 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6342 %} 6343 ins_pipe( pipe_slow ); 6344 %} 6345 6346 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6347 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6348 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6349 match(Set dst (MinV src1 src2)); 6350 match(Set dst (MaxV src1 src2)); 6351 effect(TEMP dst); 6352 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6353 ins_encode %{ 6354 int vlen_enc = vector_length_encoding(this); 6355 int opcode = this->ideal_Opcode(); 6356 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6357 assert(elem_bt == T_LONG, "sanity"); 6358 6359 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6365 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6366 Matcher::vector_element_basic_type(n) == T_LONG); 6367 match(Set dst (MinV src1 src2)); 6368 match(Set dst (MaxV src1 src2)); 6369 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6370 ins_encode %{ 6371 assert(UseAVX > 2, "required"); 6372 6373 int vlen_enc = vector_length_encoding(this); 6374 int opcode = this->ideal_Opcode(); 6375 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6376 assert(elem_bt == T_LONG, "sanity"); 6377 6378 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 // Float/Double vector Min/Max 6384 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6385 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6386 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6387 UseAVX > 0); 6388 match(Set dst (MinV a b)); 6389 match(Set dst (MaxV a b)); 6390 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6391 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6392 ins_encode %{ 6393 assert(UseAVX > 0, "required"); 6394 6395 int opcode = this->ideal_Opcode(); 6396 int vlen_enc = vector_length_encoding(this); 6397 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6398 6399 __ vminmax_fp(opcode, elem_bt, 6400 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6401 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6407 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6408 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6409 match(Set dst (MinV a b)); 6410 match(Set dst (MaxV a b)); 6411 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6412 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6413 ins_encode %{ 6414 assert(UseAVX > 2, "required"); 6415 6416 int opcode = this->ideal_Opcode(); 6417 int vlen_enc = vector_length_encoding(this); 6418 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6419 6420 __ evminmax_fp(opcode, elem_bt, 6421 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6422 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 // ------------------------------ Unsigned vector Min/Max ---------------------- 6428 6429 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6430 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6431 match(Set dst (UMinV a b)); 6432 match(Set dst (UMaxV a b)); 6433 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6434 ins_encode %{ 6435 int opcode = this->ideal_Opcode(); 6436 int vlen_enc = vector_length_encoding(this); 6437 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6438 assert(is_integral_type(elem_bt), ""); 6439 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6445 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6446 match(Set dst (UMinV a (LoadVector b))); 6447 match(Set dst (UMaxV a (LoadVector b))); 6448 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6449 ins_encode %{ 6450 int opcode = this->ideal_Opcode(); 6451 int vlen_enc = vector_length_encoding(this); 6452 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6453 assert(is_integral_type(elem_bt), ""); 6454 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6460 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6461 match(Set dst (UMinV a b)); 6462 match(Set dst (UMaxV a b)); 6463 effect(TEMP xtmp1, TEMP xtmp2); 6464 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6465 ins_encode %{ 6466 int opcode = this->ideal_Opcode(); 6467 int vlen_enc = vector_length_encoding(this); 6468 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6469 %} 6470 ins_pipe( pipe_slow ); 6471 %} 6472 6473 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6474 match(Set dst (UMinV (Binary dst src2) mask)); 6475 match(Set dst (UMaxV (Binary dst src2) mask)); 6476 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6477 ins_encode %{ 6478 int vlen_enc = vector_length_encoding(this); 6479 BasicType bt = Matcher::vector_element_basic_type(this); 6480 int opc = this->ideal_Opcode(); 6481 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6482 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6483 %} 6484 ins_pipe( pipe_slow ); 6485 %} 6486 6487 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6488 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6489 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6490 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6491 ins_encode %{ 6492 int vlen_enc = vector_length_encoding(this); 6493 BasicType bt = Matcher::vector_element_basic_type(this); 6494 int opc = this->ideal_Opcode(); 6495 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6496 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6497 %} 6498 ins_pipe( pipe_slow ); 6499 %} 6500 6501 // --------------------------------- Signum/CopySign --------------------------- 6502 6503 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6504 match(Set dst (SignumF dst (Binary zero one))); 6505 effect(KILL cr); 6506 format %{ "signumF $dst, $dst" %} 6507 ins_encode %{ 6508 int opcode = this->ideal_Opcode(); 6509 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6515 match(Set dst (SignumD dst (Binary zero one))); 6516 effect(KILL cr); 6517 format %{ "signumD $dst, $dst" %} 6518 ins_encode %{ 6519 int opcode = this->ideal_Opcode(); 6520 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6526 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6527 match(Set dst (SignumVF src (Binary zero one))); 6528 match(Set dst (SignumVD src (Binary zero one))); 6529 effect(TEMP dst, TEMP xtmp1); 6530 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6531 ins_encode %{ 6532 int opcode = this->ideal_Opcode(); 6533 int vec_enc = vector_length_encoding(this); 6534 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6535 $xtmp1$$XMMRegister, vec_enc); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6541 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6542 match(Set dst (SignumVF src (Binary zero one))); 6543 match(Set dst (SignumVD src (Binary zero one))); 6544 effect(TEMP dst, TEMP ktmp1); 6545 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6546 ins_encode %{ 6547 int opcode = this->ideal_Opcode(); 6548 int vec_enc = vector_length_encoding(this); 6549 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6550 $ktmp1$$KRegister, vec_enc); 6551 %} 6552 ins_pipe( pipe_slow ); 6553 %} 6554 6555 // --------------------------------------- 6556 // For copySign use 0xE4 as writemask for vpternlog 6557 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6558 // C (xmm2) is set to 0x7FFFFFFF 6559 // Wherever xmm2 is 0, we want to pick from B (sign) 6560 // Wherever xmm2 is 1, we want to pick from A (src) 6561 // 6562 // A B C Result 6563 // 0 0 0 0 6564 // 0 0 1 0 6565 // 0 1 0 1 6566 // 0 1 1 0 6567 // 1 0 0 0 6568 // 1 0 1 1 6569 // 1 1 0 1 6570 // 1 1 1 1 6571 // 6572 // Result going from high bit to low bit is 0x11100100 = 0xe4 6573 // --------------------------------------- 6574 6575 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6576 match(Set dst (CopySignF dst src)); 6577 effect(TEMP tmp1, TEMP tmp2); 6578 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6579 ins_encode %{ 6580 __ movl($tmp2$$Register, 0x7FFFFFFF); 6581 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6582 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6583 %} 6584 ins_pipe( pipe_slow ); 6585 %} 6586 6587 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6588 match(Set dst (CopySignD dst (Binary src zero))); 6589 ins_cost(100); 6590 effect(TEMP tmp1, TEMP tmp2); 6591 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6592 ins_encode %{ 6593 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6594 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6595 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6596 %} 6597 ins_pipe( pipe_slow ); 6598 %} 6599 6600 //----------------------------- CompressBits/ExpandBits ------------------------ 6601 6602 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6603 predicate(n->bottom_type()->isa_int()); 6604 match(Set dst (CompressBits src mask)); 6605 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6606 ins_encode %{ 6607 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6613 predicate(n->bottom_type()->isa_int()); 6614 match(Set dst (ExpandBits src mask)); 6615 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6616 ins_encode %{ 6617 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6623 predicate(n->bottom_type()->isa_int()); 6624 match(Set dst (CompressBits src (LoadI mask))); 6625 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6626 ins_encode %{ 6627 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6633 predicate(n->bottom_type()->isa_int()); 6634 match(Set dst (ExpandBits src (LoadI mask))); 6635 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6636 ins_encode %{ 6637 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6638 %} 6639 ins_pipe( pipe_slow ); 6640 %} 6641 6642 // --------------------------------- Sqrt -------------------------------------- 6643 6644 instruct vsqrtF_reg(vec dst, vec src) %{ 6645 match(Set dst (SqrtVF src)); 6646 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6647 ins_encode %{ 6648 assert(UseAVX > 0, "required"); 6649 int vlen_enc = vector_length_encoding(this); 6650 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6651 %} 6652 ins_pipe( pipe_slow ); 6653 %} 6654 6655 instruct vsqrtF_mem(vec dst, memory mem) %{ 6656 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6657 match(Set dst (SqrtVF (LoadVector mem))); 6658 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6659 ins_encode %{ 6660 assert(UseAVX > 0, "required"); 6661 int vlen_enc = vector_length_encoding(this); 6662 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6663 %} 6664 ins_pipe( pipe_slow ); 6665 %} 6666 6667 // Floating point vector sqrt 6668 instruct vsqrtD_reg(vec dst, vec src) %{ 6669 match(Set dst (SqrtVD src)); 6670 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6671 ins_encode %{ 6672 assert(UseAVX > 0, "required"); 6673 int vlen_enc = vector_length_encoding(this); 6674 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vsqrtD_mem(vec dst, memory mem) %{ 6680 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6681 match(Set dst (SqrtVD (LoadVector mem))); 6682 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6683 ins_encode %{ 6684 assert(UseAVX > 0, "required"); 6685 int vlen_enc = vector_length_encoding(this); 6686 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 // ------------------------------ Shift --------------------------------------- 6692 6693 // Left and right shift count vectors are the same on x86 6694 // (only lowest bits of xmm reg are used for count). 6695 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6696 match(Set dst (LShiftCntV cnt)); 6697 match(Set dst (RShiftCntV cnt)); 6698 format %{ "movdl $dst,$cnt\t! load shift count" %} 6699 ins_encode %{ 6700 __ movdl($dst$$XMMRegister, $cnt$$Register); 6701 %} 6702 ins_pipe( pipe_slow ); 6703 %} 6704 6705 // Byte vector shift 6706 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6707 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6708 match(Set dst ( LShiftVB src shift)); 6709 match(Set dst ( RShiftVB src shift)); 6710 match(Set dst (URShiftVB src shift)); 6711 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6712 format %{"vector_byte_shift $dst,$src,$shift" %} 6713 ins_encode %{ 6714 assert(UseSSE > 3, "required"); 6715 int opcode = this->ideal_Opcode(); 6716 bool sign = (opcode != Op_URShiftVB); 6717 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6718 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6719 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6720 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6721 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6722 %} 6723 ins_pipe( pipe_slow ); 6724 %} 6725 6726 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6727 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6728 UseAVX <= 1); 6729 match(Set dst ( LShiftVB src shift)); 6730 match(Set dst ( RShiftVB src shift)); 6731 match(Set dst (URShiftVB src shift)); 6732 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6733 format %{"vector_byte_shift $dst,$src,$shift" %} 6734 ins_encode %{ 6735 assert(UseSSE > 3, "required"); 6736 int opcode = this->ideal_Opcode(); 6737 bool sign = (opcode != Op_URShiftVB); 6738 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6739 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6740 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6741 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6742 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6743 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6744 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6745 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6746 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6747 %} 6748 ins_pipe( pipe_slow ); 6749 %} 6750 6751 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6752 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6753 UseAVX > 1); 6754 match(Set dst ( LShiftVB src shift)); 6755 match(Set dst ( RShiftVB src shift)); 6756 match(Set dst (URShiftVB src shift)); 6757 effect(TEMP dst, TEMP tmp); 6758 format %{"vector_byte_shift $dst,$src,$shift" %} 6759 ins_encode %{ 6760 int opcode = this->ideal_Opcode(); 6761 bool sign = (opcode != Op_URShiftVB); 6762 int vlen_enc = Assembler::AVX_256bit; 6763 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6764 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6765 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6766 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6767 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6773 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6774 match(Set dst ( LShiftVB src shift)); 6775 match(Set dst ( RShiftVB src shift)); 6776 match(Set dst (URShiftVB src shift)); 6777 effect(TEMP dst, TEMP tmp); 6778 format %{"vector_byte_shift $dst,$src,$shift" %} 6779 ins_encode %{ 6780 assert(UseAVX > 1, "required"); 6781 int opcode = this->ideal_Opcode(); 6782 bool sign = (opcode != Op_URShiftVB); 6783 int vlen_enc = Assembler::AVX_256bit; 6784 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6785 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6786 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6787 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6788 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6789 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6790 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6791 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6792 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6793 %} 6794 ins_pipe( pipe_slow ); 6795 %} 6796 6797 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6798 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6799 match(Set dst ( LShiftVB src shift)); 6800 match(Set dst (RShiftVB src shift)); 6801 match(Set dst (URShiftVB src shift)); 6802 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6803 format %{"vector_byte_shift $dst,$src,$shift" %} 6804 ins_encode %{ 6805 assert(UseAVX > 2, "required"); 6806 int opcode = this->ideal_Opcode(); 6807 bool sign = (opcode != Op_URShiftVB); 6808 int vlen_enc = Assembler::AVX_512bit; 6809 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6810 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6811 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6812 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6813 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6814 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6815 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6816 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6817 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6818 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6819 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6820 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6821 %} 6822 ins_pipe( pipe_slow ); 6823 %} 6824 6825 // Shorts vector logical right shift produces incorrect Java result 6826 // for negative data because java code convert short value into int with 6827 // sign extension before a shift. But char vectors are fine since chars are 6828 // unsigned values. 6829 // Shorts/Chars vector left shift 6830 instruct vshiftS(vec dst, vec src, vec shift) %{ 6831 predicate(!n->as_ShiftV()->is_var_shift()); 6832 match(Set dst ( LShiftVS src shift)); 6833 match(Set dst ( RShiftVS src shift)); 6834 match(Set dst (URShiftVS src shift)); 6835 effect(TEMP dst, USE src, USE shift); 6836 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6837 ins_encode %{ 6838 int opcode = this->ideal_Opcode(); 6839 if (UseAVX > 0) { 6840 int vlen_enc = vector_length_encoding(this); 6841 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6842 } else { 6843 int vlen = Matcher::vector_length(this); 6844 if (vlen == 2) { 6845 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6846 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6847 } else if (vlen == 4) { 6848 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6849 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6850 } else { 6851 assert (vlen == 8, "sanity"); 6852 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6853 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6854 } 6855 } 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 // Integers vector left shift 6861 instruct vshiftI(vec dst, vec src, vec shift) %{ 6862 predicate(!n->as_ShiftV()->is_var_shift()); 6863 match(Set dst ( LShiftVI src shift)); 6864 match(Set dst ( RShiftVI src shift)); 6865 match(Set dst (URShiftVI src shift)); 6866 effect(TEMP dst, USE src, USE shift); 6867 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6868 ins_encode %{ 6869 int opcode = this->ideal_Opcode(); 6870 if (UseAVX > 0) { 6871 int vlen_enc = vector_length_encoding(this); 6872 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6873 } else { 6874 int vlen = Matcher::vector_length(this); 6875 if (vlen == 2) { 6876 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6877 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6878 } else { 6879 assert(vlen == 4, "sanity"); 6880 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6881 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6882 } 6883 } 6884 %} 6885 ins_pipe( pipe_slow ); 6886 %} 6887 6888 // Integers vector left constant shift 6889 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6890 match(Set dst (LShiftVI src (LShiftCntV shift))); 6891 match(Set dst (RShiftVI src (RShiftCntV shift))); 6892 match(Set dst (URShiftVI src (RShiftCntV shift))); 6893 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6894 ins_encode %{ 6895 int opcode = this->ideal_Opcode(); 6896 if (UseAVX > 0) { 6897 int vector_len = vector_length_encoding(this); 6898 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6899 } else { 6900 int vlen = Matcher::vector_length(this); 6901 if (vlen == 2) { 6902 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6903 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6904 } else { 6905 assert(vlen == 4, "sanity"); 6906 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6907 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6908 } 6909 } 6910 %} 6911 ins_pipe( pipe_slow ); 6912 %} 6913 6914 // Longs vector shift 6915 instruct vshiftL(vec dst, vec src, vec shift) %{ 6916 predicate(!n->as_ShiftV()->is_var_shift()); 6917 match(Set dst ( LShiftVL src shift)); 6918 match(Set dst (URShiftVL src shift)); 6919 effect(TEMP dst, USE src, USE shift); 6920 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6921 ins_encode %{ 6922 int opcode = this->ideal_Opcode(); 6923 if (UseAVX > 0) { 6924 int vlen_enc = vector_length_encoding(this); 6925 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6926 } else { 6927 assert(Matcher::vector_length(this) == 2, ""); 6928 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6929 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6930 } 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 // Longs vector constant shift 6936 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6937 match(Set dst (LShiftVL src (LShiftCntV shift))); 6938 match(Set dst (URShiftVL src (RShiftCntV shift))); 6939 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6940 ins_encode %{ 6941 int opcode = this->ideal_Opcode(); 6942 if (UseAVX > 0) { 6943 int vector_len = vector_length_encoding(this); 6944 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6945 } else { 6946 assert(Matcher::vector_length(this) == 2, ""); 6947 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6948 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6949 } 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 // -------------------ArithmeticRightShift ----------------------------------- 6955 // Long vector arithmetic right shift 6956 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6957 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6958 match(Set dst (RShiftVL src shift)); 6959 effect(TEMP dst, TEMP tmp); 6960 format %{ "vshiftq $dst,$src,$shift" %} 6961 ins_encode %{ 6962 uint vlen = Matcher::vector_length(this); 6963 if (vlen == 2) { 6964 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6965 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6966 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6967 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6968 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6969 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6970 } else { 6971 assert(vlen == 4, "sanity"); 6972 assert(UseAVX > 1, "required"); 6973 int vlen_enc = Assembler::AVX_256bit; 6974 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6975 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6976 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6977 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6978 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6979 } 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6985 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6986 match(Set dst (RShiftVL src shift)); 6987 format %{ "vshiftq $dst,$src,$shift" %} 6988 ins_encode %{ 6989 int vlen_enc = vector_length_encoding(this); 6990 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 // ------------------- Variable Shift ----------------------------- 6996 // Byte variable shift 6997 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6998 predicate(Matcher::vector_length(n) <= 8 && 6999 n->as_ShiftV()->is_var_shift() && 7000 !VM_Version::supports_avx512bw()); 7001 match(Set dst ( LShiftVB src shift)); 7002 match(Set dst ( RShiftVB src shift)); 7003 match(Set dst (URShiftVB src shift)); 7004 effect(TEMP dst, TEMP vtmp); 7005 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7006 ins_encode %{ 7007 assert(UseAVX >= 2, "required"); 7008 7009 int opcode = this->ideal_Opcode(); 7010 int vlen_enc = Assembler::AVX_128bit; 7011 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7012 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7018 predicate(Matcher::vector_length(n) == 16 && 7019 n->as_ShiftV()->is_var_shift() && 7020 !VM_Version::supports_avx512bw()); 7021 match(Set dst ( LShiftVB src shift)); 7022 match(Set dst ( RShiftVB src shift)); 7023 match(Set dst (URShiftVB src shift)); 7024 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7025 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7026 ins_encode %{ 7027 assert(UseAVX >= 2, "required"); 7028 7029 int opcode = this->ideal_Opcode(); 7030 int vlen_enc = Assembler::AVX_128bit; 7031 // Shift lower half and get word result in dst 7032 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7033 7034 // Shift upper half and get word result in vtmp1 7035 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7036 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7037 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7038 7039 // Merge and down convert the two word results to byte in dst 7040 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7041 %} 7042 ins_pipe( pipe_slow ); 7043 %} 7044 7045 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7046 predicate(Matcher::vector_length(n) == 32 && 7047 n->as_ShiftV()->is_var_shift() && 7048 !VM_Version::supports_avx512bw()); 7049 match(Set dst ( LShiftVB src shift)); 7050 match(Set dst ( RShiftVB src shift)); 7051 match(Set dst (URShiftVB src shift)); 7052 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7053 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7054 ins_encode %{ 7055 assert(UseAVX >= 2, "required"); 7056 7057 int opcode = this->ideal_Opcode(); 7058 int vlen_enc = Assembler::AVX_128bit; 7059 // Process lower 128 bits and get result in dst 7060 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7061 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7062 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7063 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7064 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7065 7066 // Process higher 128 bits and get result in vtmp3 7067 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7068 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7069 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7070 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7071 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7072 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7073 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7074 7075 // Merge the two results in dst 7076 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7077 %} 7078 ins_pipe( pipe_slow ); 7079 %} 7080 7081 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7082 predicate(Matcher::vector_length(n) <= 32 && 7083 n->as_ShiftV()->is_var_shift() && 7084 VM_Version::supports_avx512bw()); 7085 match(Set dst ( LShiftVB src shift)); 7086 match(Set dst ( RShiftVB src shift)); 7087 match(Set dst (URShiftVB src shift)); 7088 effect(TEMP dst, TEMP vtmp); 7089 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7090 ins_encode %{ 7091 assert(UseAVX > 2, "required"); 7092 7093 int opcode = this->ideal_Opcode(); 7094 int vlen_enc = vector_length_encoding(this); 7095 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7096 %} 7097 ins_pipe( pipe_slow ); 7098 %} 7099 7100 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7101 predicate(Matcher::vector_length(n) == 64 && 7102 n->as_ShiftV()->is_var_shift() && 7103 VM_Version::supports_avx512bw()); 7104 match(Set dst ( LShiftVB src shift)); 7105 match(Set dst ( RShiftVB src shift)); 7106 match(Set dst (URShiftVB src shift)); 7107 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7108 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7109 ins_encode %{ 7110 assert(UseAVX > 2, "required"); 7111 7112 int opcode = this->ideal_Opcode(); 7113 int vlen_enc = Assembler::AVX_256bit; 7114 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7115 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7116 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7117 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7118 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7119 %} 7120 ins_pipe( pipe_slow ); 7121 %} 7122 7123 // Short variable shift 7124 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7125 predicate(Matcher::vector_length(n) <= 8 && 7126 n->as_ShiftV()->is_var_shift() && 7127 !VM_Version::supports_avx512bw()); 7128 match(Set dst ( LShiftVS src shift)); 7129 match(Set dst ( RShiftVS src shift)); 7130 match(Set dst (URShiftVS src shift)); 7131 effect(TEMP dst, TEMP vtmp); 7132 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7133 ins_encode %{ 7134 assert(UseAVX >= 2, "required"); 7135 7136 int opcode = this->ideal_Opcode(); 7137 bool sign = (opcode != Op_URShiftVS); 7138 int vlen_enc = Assembler::AVX_256bit; 7139 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7140 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7141 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7142 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7143 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7144 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7150 predicate(Matcher::vector_length(n) == 16 && 7151 n->as_ShiftV()->is_var_shift() && 7152 !VM_Version::supports_avx512bw()); 7153 match(Set dst ( LShiftVS src shift)); 7154 match(Set dst ( RShiftVS src shift)); 7155 match(Set dst (URShiftVS src shift)); 7156 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7157 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7158 ins_encode %{ 7159 assert(UseAVX >= 2, "required"); 7160 7161 int opcode = this->ideal_Opcode(); 7162 bool sign = (opcode != Op_URShiftVS); 7163 int vlen_enc = Assembler::AVX_256bit; 7164 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7165 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7166 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7167 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7168 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7169 7170 // Shift upper half, with result in dst using vtmp1 as TEMP 7171 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7172 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7173 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7174 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7175 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7176 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7177 7178 // Merge lower and upper half result into dst 7179 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7180 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7186 predicate(n->as_ShiftV()->is_var_shift() && 7187 VM_Version::supports_avx512bw()); 7188 match(Set dst ( LShiftVS src shift)); 7189 match(Set dst ( RShiftVS src shift)); 7190 match(Set dst (URShiftVS src shift)); 7191 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7192 ins_encode %{ 7193 assert(UseAVX > 2, "required"); 7194 7195 int opcode = this->ideal_Opcode(); 7196 int vlen_enc = vector_length_encoding(this); 7197 if (!VM_Version::supports_avx512vl()) { 7198 vlen_enc = Assembler::AVX_512bit; 7199 } 7200 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7201 %} 7202 ins_pipe( pipe_slow ); 7203 %} 7204 7205 //Integer variable shift 7206 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7207 predicate(n->as_ShiftV()->is_var_shift()); 7208 match(Set dst ( LShiftVI src shift)); 7209 match(Set dst ( RShiftVI src shift)); 7210 match(Set dst (URShiftVI src shift)); 7211 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7212 ins_encode %{ 7213 assert(UseAVX >= 2, "required"); 7214 7215 int opcode = this->ideal_Opcode(); 7216 int vlen_enc = vector_length_encoding(this); 7217 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7218 %} 7219 ins_pipe( pipe_slow ); 7220 %} 7221 7222 //Long variable shift 7223 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7224 predicate(n->as_ShiftV()->is_var_shift()); 7225 match(Set dst ( LShiftVL src shift)); 7226 match(Set dst (URShiftVL src shift)); 7227 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7228 ins_encode %{ 7229 assert(UseAVX >= 2, "required"); 7230 7231 int opcode = this->ideal_Opcode(); 7232 int vlen_enc = vector_length_encoding(this); 7233 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 //Long variable right shift arithmetic 7239 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7240 predicate(Matcher::vector_length(n) <= 4 && 7241 n->as_ShiftV()->is_var_shift() && 7242 UseAVX == 2); 7243 match(Set dst (RShiftVL src shift)); 7244 effect(TEMP dst, TEMP vtmp); 7245 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7246 ins_encode %{ 7247 int opcode = this->ideal_Opcode(); 7248 int vlen_enc = vector_length_encoding(this); 7249 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7250 $vtmp$$XMMRegister); 7251 %} 7252 ins_pipe( pipe_slow ); 7253 %} 7254 7255 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7256 predicate(n->as_ShiftV()->is_var_shift() && 7257 UseAVX > 2); 7258 match(Set dst (RShiftVL src shift)); 7259 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7260 ins_encode %{ 7261 int opcode = this->ideal_Opcode(); 7262 int vlen_enc = vector_length_encoding(this); 7263 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 // --------------------------------- AND -------------------------------------- 7269 7270 instruct vand(vec dst, vec src) %{ 7271 predicate(UseAVX == 0); 7272 match(Set dst (AndV dst src)); 7273 format %{ "pand $dst,$src\t! and vectors" %} 7274 ins_encode %{ 7275 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7276 %} 7277 ins_pipe( pipe_slow ); 7278 %} 7279 7280 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7281 predicate(UseAVX > 0); 7282 match(Set dst (AndV src1 src2)); 7283 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7284 ins_encode %{ 7285 int vlen_enc = vector_length_encoding(this); 7286 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7287 %} 7288 ins_pipe( pipe_slow ); 7289 %} 7290 7291 instruct vand_mem(vec dst, vec src, memory mem) %{ 7292 predicate((UseAVX > 0) && 7293 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7294 match(Set dst (AndV src (LoadVector mem))); 7295 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7296 ins_encode %{ 7297 int vlen_enc = vector_length_encoding(this); 7298 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 // --------------------------------- OR --------------------------------------- 7304 7305 instruct vor(vec dst, vec src) %{ 7306 predicate(UseAVX == 0); 7307 match(Set dst (OrV dst src)); 7308 format %{ "por $dst,$src\t! or vectors" %} 7309 ins_encode %{ 7310 __ por($dst$$XMMRegister, $src$$XMMRegister); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7316 predicate(UseAVX > 0); 7317 match(Set dst (OrV src1 src2)); 7318 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7319 ins_encode %{ 7320 int vlen_enc = vector_length_encoding(this); 7321 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 7325 7326 instruct vor_mem(vec dst, vec src, memory mem) %{ 7327 predicate((UseAVX > 0) && 7328 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7329 match(Set dst (OrV src (LoadVector mem))); 7330 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7331 ins_encode %{ 7332 int vlen_enc = vector_length_encoding(this); 7333 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7334 %} 7335 ins_pipe( pipe_slow ); 7336 %} 7337 7338 // --------------------------------- XOR -------------------------------------- 7339 7340 instruct vxor(vec dst, vec src) %{ 7341 predicate(UseAVX == 0); 7342 match(Set dst (XorV dst src)); 7343 format %{ "pxor $dst,$src\t! xor vectors" %} 7344 ins_encode %{ 7345 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7351 predicate(UseAVX > 0); 7352 match(Set dst (XorV src1 src2)); 7353 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7354 ins_encode %{ 7355 int vlen_enc = vector_length_encoding(this); 7356 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7362 predicate((UseAVX > 0) && 7363 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7364 match(Set dst (XorV src (LoadVector mem))); 7365 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7366 ins_encode %{ 7367 int vlen_enc = vector_length_encoding(this); 7368 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 // --------------------------------- VectorCast -------------------------------------- 7374 7375 instruct vcastBtoX(vec dst, vec src) %{ 7376 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7377 match(Set dst (VectorCastB2X src)); 7378 format %{ "vector_cast_b2x $dst,$src\t!" %} 7379 ins_encode %{ 7380 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7381 int vlen_enc = vector_length_encoding(this); 7382 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 instruct vcastBtoD(legVec dst, legVec src) %{ 7388 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7389 match(Set dst (VectorCastB2X src)); 7390 format %{ "vector_cast_b2x $dst,$src\t!" %} 7391 ins_encode %{ 7392 int vlen_enc = vector_length_encoding(this); 7393 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 instruct castStoX(vec dst, vec src) %{ 7399 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7400 Matcher::vector_length(n->in(1)) <= 8 && // src 7401 Matcher::vector_element_basic_type(n) == T_BYTE); 7402 match(Set dst (VectorCastS2X src)); 7403 format %{ "vector_cast_s2x $dst,$src" %} 7404 ins_encode %{ 7405 assert(UseAVX > 0, "required"); 7406 7407 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7408 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7409 %} 7410 ins_pipe( pipe_slow ); 7411 %} 7412 7413 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7414 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7415 Matcher::vector_length(n->in(1)) == 16 && // src 7416 Matcher::vector_element_basic_type(n) == T_BYTE); 7417 effect(TEMP dst, TEMP vtmp); 7418 match(Set dst (VectorCastS2X src)); 7419 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7420 ins_encode %{ 7421 assert(UseAVX > 0, "required"); 7422 7423 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7424 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7425 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7426 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7427 %} 7428 ins_pipe( pipe_slow ); 7429 %} 7430 7431 instruct vcastStoX_evex(vec dst, vec src) %{ 7432 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7433 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7434 match(Set dst (VectorCastS2X src)); 7435 format %{ "vector_cast_s2x $dst,$src\t!" %} 7436 ins_encode %{ 7437 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7438 int src_vlen_enc = vector_length_encoding(this, $src); 7439 int vlen_enc = vector_length_encoding(this); 7440 switch (to_elem_bt) { 7441 case T_BYTE: 7442 if (!VM_Version::supports_avx512vl()) { 7443 vlen_enc = Assembler::AVX_512bit; 7444 } 7445 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7446 break; 7447 case T_INT: 7448 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7449 break; 7450 case T_FLOAT: 7451 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7452 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7453 break; 7454 case T_LONG: 7455 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7456 break; 7457 case T_DOUBLE: { 7458 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7459 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7460 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7461 break; 7462 } 7463 default: 7464 ShouldNotReachHere(); 7465 } 7466 %} 7467 ins_pipe( pipe_slow ); 7468 %} 7469 7470 instruct castItoX(vec dst, vec src) %{ 7471 predicate(UseAVX <= 2 && 7472 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7473 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7474 match(Set dst (VectorCastI2X src)); 7475 format %{ "vector_cast_i2x $dst,$src" %} 7476 ins_encode %{ 7477 assert(UseAVX > 0, "required"); 7478 7479 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7480 int vlen_enc = vector_length_encoding(this, $src); 7481 7482 if (to_elem_bt == T_BYTE) { 7483 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7484 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7485 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7486 } else { 7487 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7488 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7489 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7490 } 7491 %} 7492 ins_pipe( pipe_slow ); 7493 %} 7494 7495 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7496 predicate(UseAVX <= 2 && 7497 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7498 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7499 match(Set dst (VectorCastI2X src)); 7500 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7501 effect(TEMP dst, TEMP vtmp); 7502 ins_encode %{ 7503 assert(UseAVX > 0, "required"); 7504 7505 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7506 int vlen_enc = vector_length_encoding(this, $src); 7507 7508 if (to_elem_bt == T_BYTE) { 7509 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7510 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7511 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7512 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7513 } else { 7514 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7515 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7516 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7517 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7518 } 7519 %} 7520 ins_pipe( pipe_slow ); 7521 %} 7522 7523 instruct vcastItoX_evex(vec dst, vec src) %{ 7524 predicate(UseAVX > 2 || 7525 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7526 match(Set dst (VectorCastI2X src)); 7527 format %{ "vector_cast_i2x $dst,$src\t!" %} 7528 ins_encode %{ 7529 assert(UseAVX > 0, "required"); 7530 7531 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7532 int src_vlen_enc = vector_length_encoding(this, $src); 7533 int dst_vlen_enc = vector_length_encoding(this); 7534 switch (dst_elem_bt) { 7535 case T_BYTE: 7536 if (!VM_Version::supports_avx512vl()) { 7537 src_vlen_enc = Assembler::AVX_512bit; 7538 } 7539 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7540 break; 7541 case T_SHORT: 7542 if (!VM_Version::supports_avx512vl()) { 7543 src_vlen_enc = Assembler::AVX_512bit; 7544 } 7545 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7546 break; 7547 case T_FLOAT: 7548 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7549 break; 7550 case T_LONG: 7551 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7552 break; 7553 case T_DOUBLE: 7554 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7555 break; 7556 default: 7557 ShouldNotReachHere(); 7558 } 7559 %} 7560 ins_pipe( pipe_slow ); 7561 %} 7562 7563 instruct vcastLtoBS(vec dst, vec src) %{ 7564 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7565 UseAVX <= 2); 7566 match(Set dst (VectorCastL2X src)); 7567 format %{ "vector_cast_l2x $dst,$src" %} 7568 ins_encode %{ 7569 assert(UseAVX > 0, "required"); 7570 7571 int vlen = Matcher::vector_length_in_bytes(this, $src); 7572 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7573 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7574 : ExternalAddress(vector_int_to_short_mask()); 7575 if (vlen <= 16) { 7576 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7577 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7578 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7579 } else { 7580 assert(vlen <= 32, "required"); 7581 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7582 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7583 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7584 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7585 } 7586 if (to_elem_bt == T_BYTE) { 7587 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7588 } 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 instruct vcastLtoX_evex(vec dst, vec src) %{ 7594 predicate(UseAVX > 2 || 7595 (Matcher::vector_element_basic_type(n) == T_INT || 7596 Matcher::vector_element_basic_type(n) == T_FLOAT || 7597 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7598 match(Set dst (VectorCastL2X src)); 7599 format %{ "vector_cast_l2x $dst,$src\t!" %} 7600 ins_encode %{ 7601 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7602 int vlen = Matcher::vector_length_in_bytes(this, $src); 7603 int vlen_enc = vector_length_encoding(this, $src); 7604 switch (to_elem_bt) { 7605 case T_BYTE: 7606 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7607 vlen_enc = Assembler::AVX_512bit; 7608 } 7609 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7610 break; 7611 case T_SHORT: 7612 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7613 vlen_enc = Assembler::AVX_512bit; 7614 } 7615 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7616 break; 7617 case T_INT: 7618 if (vlen == 8) { 7619 if ($dst$$XMMRegister != $src$$XMMRegister) { 7620 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7621 } 7622 } else if (vlen == 16) { 7623 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7624 } else if (vlen == 32) { 7625 if (UseAVX > 2) { 7626 if (!VM_Version::supports_avx512vl()) { 7627 vlen_enc = Assembler::AVX_512bit; 7628 } 7629 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7630 } else { 7631 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7632 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7633 } 7634 } else { // vlen == 64 7635 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7636 } 7637 break; 7638 case T_FLOAT: 7639 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7640 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7641 break; 7642 case T_DOUBLE: 7643 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7644 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7645 break; 7646 7647 default: assert(false, "%s", type2name(to_elem_bt)); 7648 } 7649 %} 7650 ins_pipe( pipe_slow ); 7651 %} 7652 7653 instruct vcastFtoD_reg(vec dst, vec src) %{ 7654 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7655 match(Set dst (VectorCastF2X src)); 7656 format %{ "vector_cast_f2d $dst,$src\t!" %} 7657 ins_encode %{ 7658 int vlen_enc = vector_length_encoding(this); 7659 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7660 %} 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 7665 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7666 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7667 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7668 match(Set dst (VectorCastF2X src)); 7669 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7670 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7671 ins_encode %{ 7672 int vlen_enc = vector_length_encoding(this, $src); 7673 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7674 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7675 // 32 bit addresses for register indirect addressing mode since stub constants 7676 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7677 // However, targets are free to increase this limit, but having a large code cache size 7678 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7679 // cap we save a temporary register allocation which in limiting case can prevent 7680 // spilling in high register pressure blocks. 7681 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7682 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7683 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7684 %} 7685 ins_pipe( pipe_slow ); 7686 %} 7687 7688 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7689 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7690 is_integral_type(Matcher::vector_element_basic_type(n))); 7691 match(Set dst (VectorCastF2X src)); 7692 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7693 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7694 ins_encode %{ 7695 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7696 if (to_elem_bt == T_LONG) { 7697 int vlen_enc = vector_length_encoding(this); 7698 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7699 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7700 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7701 } else { 7702 int vlen_enc = vector_length_encoding(this, $src); 7703 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7704 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7705 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7706 } 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vcastDtoF_reg(vec dst, vec src) %{ 7712 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7713 match(Set dst (VectorCastD2X src)); 7714 format %{ "vector_cast_d2x $dst,$src\t!" %} 7715 ins_encode %{ 7716 int vlen_enc = vector_length_encoding(this, $src); 7717 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7723 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7724 is_integral_type(Matcher::vector_element_basic_type(n))); 7725 match(Set dst (VectorCastD2X src)); 7726 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7727 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7728 ins_encode %{ 7729 int vlen_enc = vector_length_encoding(this, $src); 7730 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7731 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7732 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7733 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7734 %} 7735 ins_pipe( pipe_slow ); 7736 %} 7737 7738 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7739 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7740 is_integral_type(Matcher::vector_element_basic_type(n))); 7741 match(Set dst (VectorCastD2X src)); 7742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7743 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7744 ins_encode %{ 7745 int vlen_enc = vector_length_encoding(this, $src); 7746 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7747 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7748 ExternalAddress(vector_float_signflip()); 7749 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7750 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vucast(vec dst, vec src) %{ 7756 match(Set dst (VectorUCastB2X src)); 7757 match(Set dst (VectorUCastS2X src)); 7758 match(Set dst (VectorUCastI2X src)); 7759 format %{ "vector_ucast $dst,$src\t!" %} 7760 ins_encode %{ 7761 assert(UseAVX > 0, "required"); 7762 7763 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7764 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7765 int vlen_enc = vector_length_encoding(this); 7766 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7767 %} 7768 ins_pipe( pipe_slow ); 7769 %} 7770 7771 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7772 predicate(!VM_Version::supports_avx512vl() && 7773 Matcher::vector_length_in_bytes(n) < 64 && 7774 Matcher::vector_element_basic_type(n) == T_INT); 7775 match(Set dst (RoundVF src)); 7776 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7777 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7778 ins_encode %{ 7779 int vlen_enc = vector_length_encoding(this); 7780 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7781 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7782 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7783 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7789 predicate((VM_Version::supports_avx512vl() || 7790 Matcher::vector_length_in_bytes(n) == 64) && 7791 Matcher::vector_element_basic_type(n) == T_INT); 7792 match(Set dst (RoundVF src)); 7793 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7794 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7795 ins_encode %{ 7796 int vlen_enc = vector_length_encoding(this); 7797 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7798 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7799 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7800 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7806 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7807 match(Set dst (RoundVD src)); 7808 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7809 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7810 ins_encode %{ 7811 int vlen_enc = vector_length_encoding(this); 7812 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7813 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7814 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7815 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 // --------------------------------- VectorMaskCmp -------------------------------------- 7821 7822 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7823 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7824 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7825 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7826 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7827 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7828 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7829 ins_encode %{ 7830 int vlen_enc = vector_length_encoding(this, $src1); 7831 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7832 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7833 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7834 } else { 7835 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7836 } 7837 %} 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7842 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7843 n->bottom_type()->isa_vectmask() == nullptr && 7844 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7845 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7846 effect(TEMP ktmp); 7847 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7848 ins_encode %{ 7849 int vlen_enc = Assembler::AVX_512bit; 7850 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7851 KRegister mask = k0; // The comparison itself is not being masked. 7852 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7853 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7854 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7855 } else { 7856 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7857 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7858 } 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7864 predicate(n->bottom_type()->isa_vectmask() && 7865 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7866 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7867 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7868 ins_encode %{ 7869 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7870 int vlen_enc = vector_length_encoding(this, $src1); 7871 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7872 KRegister mask = k0; // The comparison itself is not being masked. 7873 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7874 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7875 } else { 7876 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7877 } 7878 %} 7879 ins_pipe( pipe_slow ); 7880 %} 7881 7882 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7883 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7884 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7885 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7886 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7887 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7888 (n->in(2)->get_int() == BoolTest::eq || 7889 n->in(2)->get_int() == BoolTest::lt || 7890 n->in(2)->get_int() == BoolTest::gt)); // cond 7891 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7892 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7893 ins_encode %{ 7894 int vlen_enc = vector_length_encoding(this, $src1); 7895 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7896 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7897 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7903 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7904 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7905 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7906 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7907 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7908 (n->in(2)->get_int() == BoolTest::ne || 7909 n->in(2)->get_int() == BoolTest::le || 7910 n->in(2)->get_int() == BoolTest::ge)); // cond 7911 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7912 effect(TEMP dst, TEMP xtmp); 7913 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7914 ins_encode %{ 7915 int vlen_enc = vector_length_encoding(this, $src1); 7916 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7917 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7918 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7924 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7925 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7926 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7927 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7928 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7929 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7930 effect(TEMP dst, TEMP xtmp); 7931 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7932 ins_encode %{ 7933 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7934 int vlen_enc = vector_length_encoding(this, $src1); 7935 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7936 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7937 7938 if (vlen_enc == Assembler::AVX_128bit) { 7939 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7940 } else { 7941 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7942 } 7943 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7944 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7945 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7951 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7952 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7953 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7954 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7955 effect(TEMP ktmp); 7956 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7957 ins_encode %{ 7958 assert(UseAVX > 2, "required"); 7959 7960 int vlen_enc = vector_length_encoding(this, $src1); 7961 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7962 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7963 KRegister mask = k0; // The comparison itself is not being masked. 7964 bool merge = false; 7965 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7966 7967 switch (src1_elem_bt) { 7968 case T_INT: { 7969 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7970 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7971 break; 7972 } 7973 case T_LONG: { 7974 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7975 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7976 break; 7977 } 7978 default: assert(false, "%s", type2name(src1_elem_bt)); 7979 } 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 7985 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7986 predicate(n->bottom_type()->isa_vectmask() && 7987 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7988 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7989 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7990 ins_encode %{ 7991 assert(UseAVX > 2, "required"); 7992 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7993 7994 int vlen_enc = vector_length_encoding(this, $src1); 7995 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7996 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7997 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7998 7999 // Comparison i 8000 switch (src1_elem_bt) { 8001 case T_BYTE: { 8002 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8003 break; 8004 } 8005 case T_SHORT: { 8006 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8007 break; 8008 } 8009 case T_INT: { 8010 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8011 break; 8012 } 8013 case T_LONG: { 8014 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8015 break; 8016 } 8017 default: assert(false, "%s", type2name(src1_elem_bt)); 8018 } 8019 %} 8020 ins_pipe( pipe_slow ); 8021 %} 8022 8023 // Extract 8024 8025 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8026 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8027 match(Set dst (ExtractI src idx)); 8028 match(Set dst (ExtractS src idx)); 8029 match(Set dst (ExtractB src idx)); 8030 format %{ "extractI $dst,$src,$idx\t!" %} 8031 ins_encode %{ 8032 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8033 8034 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8035 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8041 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8042 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8043 match(Set dst (ExtractI src idx)); 8044 match(Set dst (ExtractS src idx)); 8045 match(Set dst (ExtractB src idx)); 8046 effect(TEMP vtmp); 8047 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8048 ins_encode %{ 8049 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8050 8051 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8052 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8053 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8054 %} 8055 ins_pipe( pipe_slow ); 8056 %} 8057 8058 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8059 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8060 match(Set dst (ExtractL src idx)); 8061 format %{ "extractL $dst,$src,$idx\t!" %} 8062 ins_encode %{ 8063 assert(UseSSE >= 4, "required"); 8064 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8065 8066 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8072 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8073 Matcher::vector_length(n->in(1)) == 8); // src 8074 match(Set dst (ExtractL src idx)); 8075 effect(TEMP vtmp); 8076 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8077 ins_encode %{ 8078 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8079 8080 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8081 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8082 %} 8083 ins_pipe( pipe_slow ); 8084 %} 8085 8086 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8087 predicate(Matcher::vector_length(n->in(1)) <= 4); 8088 match(Set dst (ExtractF src idx)); 8089 effect(TEMP dst, TEMP vtmp); 8090 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8091 ins_encode %{ 8092 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8093 8094 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8100 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8101 Matcher::vector_length(n->in(1)/*src*/) == 16); 8102 match(Set dst (ExtractF src idx)); 8103 effect(TEMP vtmp); 8104 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8105 ins_encode %{ 8106 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8107 8108 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8109 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8110 %} 8111 ins_pipe( pipe_slow ); 8112 %} 8113 8114 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8115 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8116 match(Set dst (ExtractD src idx)); 8117 format %{ "extractD $dst,$src,$idx\t!" %} 8118 ins_encode %{ 8119 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8120 8121 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8127 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8128 Matcher::vector_length(n->in(1)) == 8); // src 8129 match(Set dst (ExtractD src idx)); 8130 effect(TEMP vtmp); 8131 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8132 ins_encode %{ 8133 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8134 8135 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8136 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8137 %} 8138 ins_pipe( pipe_slow ); 8139 %} 8140 8141 // --------------------------------- Vector Blend -------------------------------------- 8142 8143 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8144 predicate(UseAVX == 0); 8145 match(Set dst (VectorBlend (Binary dst src) mask)); 8146 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8147 effect(TEMP tmp); 8148 ins_encode %{ 8149 assert(UseSSE >= 4, "required"); 8150 8151 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8152 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8153 } 8154 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8160 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8161 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8162 Matcher::vector_length_in_bytes(n) <= 32 && 8163 is_integral_type(Matcher::vector_element_basic_type(n))); 8164 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8165 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8166 ins_encode %{ 8167 int vlen_enc = vector_length_encoding(this); 8168 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8169 %} 8170 ins_pipe( pipe_slow ); 8171 %} 8172 8173 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8174 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8175 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8176 Matcher::vector_length_in_bytes(n) <= 32 && 8177 !is_integral_type(Matcher::vector_element_basic_type(n))); 8178 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8179 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8180 ins_encode %{ 8181 int vlen_enc = vector_length_encoding(this); 8182 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8188 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8189 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8190 Matcher::vector_length_in_bytes(n) <= 32); 8191 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8192 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8193 effect(TEMP vtmp, TEMP dst); 8194 ins_encode %{ 8195 int vlen_enc = vector_length_encoding(this); 8196 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8197 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8198 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8204 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8205 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8206 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8207 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8208 effect(TEMP ktmp); 8209 ins_encode %{ 8210 int vlen_enc = Assembler::AVX_512bit; 8211 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8212 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8213 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8214 %} 8215 ins_pipe( pipe_slow ); 8216 %} 8217 8218 8219 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8220 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8221 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8222 VM_Version::supports_avx512bw())); 8223 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8224 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8225 ins_encode %{ 8226 int vlen_enc = vector_length_encoding(this); 8227 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8228 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 // --------------------------------- ABS -------------------------------------- 8234 // a = |a| 8235 instruct vabsB_reg(vec dst, vec src) %{ 8236 match(Set dst (AbsVB src)); 8237 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8238 ins_encode %{ 8239 uint vlen = Matcher::vector_length(this); 8240 if (vlen <= 16) { 8241 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8242 } else { 8243 int vlen_enc = vector_length_encoding(this); 8244 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8245 } 8246 %} 8247 ins_pipe( pipe_slow ); 8248 %} 8249 8250 instruct vabsS_reg(vec dst, vec src) %{ 8251 match(Set dst (AbsVS src)); 8252 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8253 ins_encode %{ 8254 uint vlen = Matcher::vector_length(this); 8255 if (vlen <= 8) { 8256 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8257 } else { 8258 int vlen_enc = vector_length_encoding(this); 8259 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8260 } 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vabsI_reg(vec dst, vec src) %{ 8266 match(Set dst (AbsVI src)); 8267 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8268 ins_encode %{ 8269 uint vlen = Matcher::vector_length(this); 8270 if (vlen <= 4) { 8271 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8272 } else { 8273 int vlen_enc = vector_length_encoding(this); 8274 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8275 } 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vabsL_reg(vec dst, vec src) %{ 8281 match(Set dst (AbsVL src)); 8282 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8283 ins_encode %{ 8284 assert(UseAVX > 2, "required"); 8285 int vlen_enc = vector_length_encoding(this); 8286 if (!VM_Version::supports_avx512vl()) { 8287 vlen_enc = Assembler::AVX_512bit; 8288 } 8289 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8290 %} 8291 ins_pipe( pipe_slow ); 8292 %} 8293 8294 // --------------------------------- ABSNEG -------------------------------------- 8295 8296 instruct vabsnegF(vec dst, vec src) %{ 8297 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8298 match(Set dst (AbsVF src)); 8299 match(Set dst (NegVF src)); 8300 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8301 ins_cost(150); 8302 ins_encode %{ 8303 int opcode = this->ideal_Opcode(); 8304 int vlen = Matcher::vector_length(this); 8305 if (vlen == 2) { 8306 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8307 } else { 8308 assert(vlen == 8 || vlen == 16, "required"); 8309 int vlen_enc = vector_length_encoding(this); 8310 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8311 } 8312 %} 8313 ins_pipe( pipe_slow ); 8314 %} 8315 8316 instruct vabsneg4F(vec dst) %{ 8317 predicate(Matcher::vector_length(n) == 4); 8318 match(Set dst (AbsVF dst)); 8319 match(Set dst (NegVF dst)); 8320 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8321 ins_cost(150); 8322 ins_encode %{ 8323 int opcode = this->ideal_Opcode(); 8324 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vabsnegD(vec dst, vec src) %{ 8330 match(Set dst (AbsVD src)); 8331 match(Set dst (NegVD src)); 8332 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8333 ins_encode %{ 8334 int opcode = this->ideal_Opcode(); 8335 uint vlen = Matcher::vector_length(this); 8336 if (vlen == 2) { 8337 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8338 } else { 8339 int vlen_enc = vector_length_encoding(this); 8340 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8341 } 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 //------------------------------------- VectorTest -------------------------------------------- 8347 8348 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8349 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8350 match(Set cr (VectorTest src1 src2)); 8351 effect(TEMP vtmp); 8352 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8353 ins_encode %{ 8354 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8355 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8356 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8357 %} 8358 ins_pipe( pipe_slow ); 8359 %} 8360 8361 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8362 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8363 match(Set cr (VectorTest src1 src2)); 8364 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8365 ins_encode %{ 8366 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8367 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8368 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8369 %} 8370 ins_pipe( pipe_slow ); 8371 %} 8372 8373 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8374 predicate((Matcher::vector_length(n->in(1)) < 8 || 8375 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8376 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8377 match(Set cr (VectorTest src1 src2)); 8378 effect(TEMP tmp); 8379 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8380 ins_encode %{ 8381 uint masklen = Matcher::vector_length(this, $src1); 8382 __ kmovwl($tmp$$Register, $src1$$KRegister); 8383 __ andl($tmp$$Register, (1 << masklen) - 1); 8384 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8390 predicate((Matcher::vector_length(n->in(1)) < 8 || 8391 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8392 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8393 match(Set cr (VectorTest src1 src2)); 8394 effect(TEMP tmp); 8395 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8396 ins_encode %{ 8397 uint masklen = Matcher::vector_length(this, $src1); 8398 __ kmovwl($tmp$$Register, $src1$$KRegister); 8399 __ andl($tmp$$Register, (1 << masklen) - 1); 8400 %} 8401 ins_pipe( pipe_slow ); 8402 %} 8403 8404 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8405 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8406 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8407 match(Set cr (VectorTest src1 src2)); 8408 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8409 ins_encode %{ 8410 uint masklen = Matcher::vector_length(this, $src1); 8411 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 //------------------------------------- LoadMask -------------------------------------------- 8417 8418 instruct loadMask(legVec dst, legVec src) %{ 8419 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8420 match(Set dst (VectorLoadMask src)); 8421 effect(TEMP dst); 8422 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8423 ins_encode %{ 8424 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8425 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8426 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8427 %} 8428 ins_pipe( pipe_slow ); 8429 %} 8430 8431 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8432 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8433 match(Set dst (VectorLoadMask src)); 8434 effect(TEMP xtmp); 8435 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8436 ins_encode %{ 8437 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8438 true, Assembler::AVX_512bit); 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 8443 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8444 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8445 match(Set dst (VectorLoadMask src)); 8446 effect(TEMP xtmp); 8447 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8448 ins_encode %{ 8449 int vlen_enc = vector_length_encoding(in(1)); 8450 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8451 false, vlen_enc); 8452 %} 8453 ins_pipe( pipe_slow ); 8454 %} 8455 8456 //------------------------------------- StoreMask -------------------------------------------- 8457 8458 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8459 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8460 match(Set dst (VectorStoreMask src size)); 8461 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8462 ins_encode %{ 8463 int vlen = Matcher::vector_length(this); 8464 if (vlen <= 16 && UseAVX <= 2) { 8465 assert(UseSSE >= 3, "required"); 8466 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8467 } else { 8468 assert(UseAVX > 0, "required"); 8469 int src_vlen_enc = vector_length_encoding(this, $src); 8470 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8471 } 8472 %} 8473 ins_pipe( pipe_slow ); 8474 %} 8475 8476 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8477 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8478 match(Set dst (VectorStoreMask src size)); 8479 effect(TEMP_DEF dst, TEMP xtmp); 8480 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8481 ins_encode %{ 8482 int vlen_enc = Assembler::AVX_128bit; 8483 int vlen = Matcher::vector_length(this); 8484 if (vlen <= 8) { 8485 assert(UseSSE >= 3, "required"); 8486 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8487 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8488 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8489 } else { 8490 assert(UseAVX > 0, "required"); 8491 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8492 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8493 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8494 } 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8500 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8501 match(Set dst (VectorStoreMask src size)); 8502 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8503 effect(TEMP_DEF dst, TEMP xtmp); 8504 ins_encode %{ 8505 int vlen_enc = Assembler::AVX_128bit; 8506 int vlen = Matcher::vector_length(this); 8507 if (vlen <= 4) { 8508 assert(UseSSE >= 3, "required"); 8509 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8510 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8511 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8512 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8513 } else { 8514 assert(UseAVX > 0, "required"); 8515 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8516 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8517 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8518 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8519 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8520 } 8521 %} 8522 ins_pipe( pipe_slow ); 8523 %} 8524 8525 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8526 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8527 match(Set dst (VectorStoreMask src size)); 8528 effect(TEMP_DEF dst, TEMP xtmp); 8529 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8530 ins_encode %{ 8531 assert(UseSSE >= 3, "required"); 8532 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8533 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8534 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8535 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8536 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8537 %} 8538 ins_pipe( pipe_slow ); 8539 %} 8540 8541 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8542 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8543 match(Set dst (VectorStoreMask src size)); 8544 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8545 effect(TEMP_DEF dst, TEMP vtmp); 8546 ins_encode %{ 8547 int vlen_enc = Assembler::AVX_128bit; 8548 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8549 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8550 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8551 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8552 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8553 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8554 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8555 %} 8556 ins_pipe( pipe_slow ); 8557 %} 8558 8559 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8560 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8561 match(Set dst (VectorStoreMask src size)); 8562 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8563 ins_encode %{ 8564 int src_vlen_enc = vector_length_encoding(this, $src); 8565 int dst_vlen_enc = vector_length_encoding(this); 8566 if (!VM_Version::supports_avx512vl()) { 8567 src_vlen_enc = Assembler::AVX_512bit; 8568 } 8569 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8570 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8571 %} 8572 ins_pipe( pipe_slow ); 8573 %} 8574 8575 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8576 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8577 match(Set dst (VectorStoreMask src size)); 8578 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8579 ins_encode %{ 8580 int src_vlen_enc = vector_length_encoding(this, $src); 8581 int dst_vlen_enc = vector_length_encoding(this); 8582 if (!VM_Version::supports_avx512vl()) { 8583 src_vlen_enc = Assembler::AVX_512bit; 8584 } 8585 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8586 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8592 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8593 match(Set dst (VectorStoreMask mask size)); 8594 effect(TEMP_DEF dst); 8595 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8596 ins_encode %{ 8597 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8598 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8599 false, Assembler::AVX_512bit, noreg); 8600 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8606 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8607 match(Set dst (VectorStoreMask mask size)); 8608 effect(TEMP_DEF dst); 8609 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8610 ins_encode %{ 8611 int dst_vlen_enc = vector_length_encoding(this); 8612 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8613 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8614 %} 8615 ins_pipe( pipe_slow ); 8616 %} 8617 8618 instruct vmaskcast_evex(kReg dst) %{ 8619 match(Set dst (VectorMaskCast dst)); 8620 ins_cost(0); 8621 format %{ "vector_mask_cast $dst" %} 8622 ins_encode %{ 8623 // empty 8624 %} 8625 ins_pipe(empty); 8626 %} 8627 8628 instruct vmaskcast(vec dst) %{ 8629 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8630 match(Set dst (VectorMaskCast dst)); 8631 ins_cost(0); 8632 format %{ "vector_mask_cast $dst" %} 8633 ins_encode %{ 8634 // empty 8635 %} 8636 ins_pipe(empty); 8637 %} 8638 8639 instruct vmaskcast_avx(vec dst, vec src) %{ 8640 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8641 match(Set dst (VectorMaskCast src)); 8642 format %{ "vector_mask_cast $dst, $src" %} 8643 ins_encode %{ 8644 int vlen = Matcher::vector_length(this); 8645 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8646 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8647 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8648 %} 8649 ins_pipe(pipe_slow); 8650 %} 8651 8652 //-------------------------------- Load Iota Indices ---------------------------------- 8653 8654 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8655 match(Set dst (VectorLoadConst src)); 8656 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8657 ins_encode %{ 8658 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8659 BasicType bt = Matcher::vector_element_basic_type(this); 8660 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8666 match(Set dst (PopulateIndex src1 src2)); 8667 effect(TEMP dst, TEMP vtmp); 8668 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8669 ins_encode %{ 8670 assert($src2$$constant == 1, "required"); 8671 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8672 int vlen_enc = vector_length_encoding(this); 8673 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8674 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8675 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8676 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8677 %} 8678 ins_pipe( pipe_slow ); 8679 %} 8680 8681 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8682 match(Set dst (PopulateIndex src1 src2)); 8683 effect(TEMP dst, TEMP vtmp); 8684 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8685 ins_encode %{ 8686 assert($src2$$constant == 1, "required"); 8687 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8688 int vlen_enc = vector_length_encoding(this); 8689 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8690 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8691 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8692 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 //-------------------------------- Rearrange ---------------------------------- 8698 8699 // LoadShuffle/Rearrange for Byte 8700 instruct rearrangeB(vec dst, vec shuffle) %{ 8701 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8702 Matcher::vector_length(n) < 32); 8703 match(Set dst (VectorRearrange dst shuffle)); 8704 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8705 ins_encode %{ 8706 assert(UseSSE >= 4, "required"); 8707 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8713 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8714 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8715 match(Set dst (VectorRearrange src shuffle)); 8716 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8717 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8718 ins_encode %{ 8719 assert(UseAVX >= 2, "required"); 8720 // Swap src into vtmp1 8721 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8722 // Shuffle swapped src to get entries from other 128 bit lane 8723 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8724 // Shuffle original src to get entries from self 128 bit lane 8725 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8726 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8727 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8728 // Perform the blend 8729 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 8735 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8736 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8737 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8738 match(Set dst (VectorRearrange src shuffle)); 8739 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8740 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8741 ins_encode %{ 8742 int vlen_enc = vector_length_encoding(this); 8743 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8744 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8745 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8751 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8752 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8753 match(Set dst (VectorRearrange src shuffle)); 8754 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8755 ins_encode %{ 8756 int vlen_enc = vector_length_encoding(this); 8757 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 // LoadShuffle/Rearrange for Short 8763 8764 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8765 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8766 !VM_Version::supports_avx512bw()); 8767 match(Set dst (VectorLoadShuffle src)); 8768 effect(TEMP dst, TEMP vtmp); 8769 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8770 ins_encode %{ 8771 // Create a byte shuffle mask from short shuffle mask 8772 // only byte shuffle instruction available on these platforms 8773 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8774 if (UseAVX == 0) { 8775 assert(vlen_in_bytes <= 16, "required"); 8776 // Multiply each shuffle by two to get byte index 8777 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8778 __ psllw($vtmp$$XMMRegister, 1); 8779 8780 // Duplicate to create 2 copies of byte index 8781 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8782 __ psllw($dst$$XMMRegister, 8); 8783 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8784 8785 // Add one to get alternate byte index 8786 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8787 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8788 } else { 8789 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8790 int vlen_enc = vector_length_encoding(this); 8791 // Multiply each shuffle by two to get byte index 8792 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8793 8794 // Duplicate to create 2 copies of byte index 8795 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8796 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8797 8798 // Add one to get alternate byte index 8799 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8800 } 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct rearrangeS(vec dst, vec shuffle) %{ 8806 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8807 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8808 match(Set dst (VectorRearrange dst shuffle)); 8809 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8810 ins_encode %{ 8811 assert(UseSSE >= 4, "required"); 8812 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8813 %} 8814 ins_pipe( pipe_slow ); 8815 %} 8816 8817 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8818 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8819 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8820 match(Set dst (VectorRearrange src shuffle)); 8821 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8822 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8823 ins_encode %{ 8824 assert(UseAVX >= 2, "required"); 8825 // Swap src into vtmp1 8826 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8827 // Shuffle swapped src to get entries from other 128 bit lane 8828 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8829 // Shuffle original src to get entries from self 128 bit lane 8830 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8831 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8832 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8833 // Perform the blend 8834 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8840 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8841 VM_Version::supports_avx512bw()); 8842 match(Set dst (VectorRearrange src shuffle)); 8843 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8844 ins_encode %{ 8845 int vlen_enc = vector_length_encoding(this); 8846 if (!VM_Version::supports_avx512vl()) { 8847 vlen_enc = Assembler::AVX_512bit; 8848 } 8849 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8850 %} 8851 ins_pipe( pipe_slow ); 8852 %} 8853 8854 // LoadShuffle/Rearrange for Integer and Float 8855 8856 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8857 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8858 Matcher::vector_length(n) == 4 && UseAVX == 0); 8859 match(Set dst (VectorLoadShuffle src)); 8860 effect(TEMP dst, TEMP vtmp); 8861 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8862 ins_encode %{ 8863 assert(UseSSE >= 4, "required"); 8864 8865 // Create a byte shuffle mask from int shuffle mask 8866 // only byte shuffle instruction available on these platforms 8867 8868 // Duplicate and multiply each shuffle by 4 8869 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8870 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8871 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8872 __ psllw($vtmp$$XMMRegister, 2); 8873 8874 // Duplicate again to create 4 copies of byte index 8875 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8876 __ psllw($dst$$XMMRegister, 8); 8877 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8878 8879 // Add 3,2,1,0 to get alternate byte index 8880 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8881 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8882 %} 8883 ins_pipe( pipe_slow ); 8884 %} 8885 8886 instruct rearrangeI(vec dst, vec shuffle) %{ 8887 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8888 UseAVX == 0); 8889 match(Set dst (VectorRearrange dst shuffle)); 8890 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8891 ins_encode %{ 8892 assert(UseSSE >= 4, "required"); 8893 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8899 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8900 UseAVX > 0); 8901 match(Set dst (VectorRearrange src shuffle)); 8902 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8903 ins_encode %{ 8904 int vlen_enc = vector_length_encoding(this); 8905 BasicType bt = Matcher::vector_element_basic_type(this); 8906 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8907 %} 8908 ins_pipe( pipe_slow ); 8909 %} 8910 8911 // LoadShuffle/Rearrange for Long and Double 8912 8913 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8914 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8915 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8916 match(Set dst (VectorLoadShuffle src)); 8917 effect(TEMP dst, TEMP vtmp); 8918 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8919 ins_encode %{ 8920 assert(UseAVX >= 2, "required"); 8921 8922 int vlen_enc = vector_length_encoding(this); 8923 // Create a double word shuffle mask from long shuffle mask 8924 // only double word shuffle instruction available on these platforms 8925 8926 // Multiply each shuffle by two to get double word index 8927 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8928 8929 // Duplicate each double word shuffle 8930 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8931 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8932 8933 // Add one to get alternate double word index 8934 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8935 %} 8936 ins_pipe( pipe_slow ); 8937 %} 8938 8939 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8940 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8941 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8942 match(Set dst (VectorRearrange src shuffle)); 8943 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8944 ins_encode %{ 8945 assert(UseAVX >= 2, "required"); 8946 8947 int vlen_enc = vector_length_encoding(this); 8948 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8954 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8955 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8956 match(Set dst (VectorRearrange src shuffle)); 8957 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8958 ins_encode %{ 8959 assert(UseAVX > 2, "required"); 8960 8961 int vlen_enc = vector_length_encoding(this); 8962 if (vlen_enc == Assembler::AVX_128bit) { 8963 vlen_enc = Assembler::AVX_256bit; 8964 } 8965 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8966 %} 8967 ins_pipe( pipe_slow ); 8968 %} 8969 8970 // --------------------------------- FMA -------------------------------------- 8971 // a * b + c 8972 8973 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8974 match(Set c (FmaVF c (Binary a b))); 8975 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8976 ins_cost(150); 8977 ins_encode %{ 8978 assert(UseFMA, "not enabled"); 8979 int vlen_enc = vector_length_encoding(this); 8980 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8981 %} 8982 ins_pipe( pipe_slow ); 8983 %} 8984 8985 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8986 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8987 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8988 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8989 ins_cost(150); 8990 ins_encode %{ 8991 assert(UseFMA, "not enabled"); 8992 int vlen_enc = vector_length_encoding(this); 8993 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8999 match(Set c (FmaVD c (Binary a b))); 9000 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9001 ins_cost(150); 9002 ins_encode %{ 9003 assert(UseFMA, "not enabled"); 9004 int vlen_enc = vector_length_encoding(this); 9005 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9006 %} 9007 ins_pipe( pipe_slow ); 9008 %} 9009 9010 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9011 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9012 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9013 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9014 ins_cost(150); 9015 ins_encode %{ 9016 assert(UseFMA, "not enabled"); 9017 int vlen_enc = vector_length_encoding(this); 9018 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9019 %} 9020 ins_pipe( pipe_slow ); 9021 %} 9022 9023 // --------------------------------- Vector Multiply Add -------------------------------------- 9024 9025 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9026 predicate(UseAVX == 0); 9027 match(Set dst (MulAddVS2VI dst src1)); 9028 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9029 ins_encode %{ 9030 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9036 predicate(UseAVX > 0); 9037 match(Set dst (MulAddVS2VI src1 src2)); 9038 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9039 ins_encode %{ 9040 int vlen_enc = vector_length_encoding(this); 9041 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9047 9048 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9049 predicate(VM_Version::supports_avx512_vnni()); 9050 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9051 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9052 ins_encode %{ 9053 assert(UseAVX > 2, "required"); 9054 int vlen_enc = vector_length_encoding(this); 9055 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9056 %} 9057 ins_pipe( pipe_slow ); 9058 ins_cost(10); 9059 %} 9060 9061 // --------------------------------- PopCount -------------------------------------- 9062 9063 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9064 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9065 match(Set dst (PopCountVI src)); 9066 match(Set dst (PopCountVL src)); 9067 format %{ "vector_popcount_integral $dst, $src" %} 9068 ins_encode %{ 9069 int opcode = this->ideal_Opcode(); 9070 int vlen_enc = vector_length_encoding(this, $src); 9071 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9072 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9078 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9079 match(Set dst (PopCountVI src mask)); 9080 match(Set dst (PopCountVL src mask)); 9081 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9082 ins_encode %{ 9083 int vlen_enc = vector_length_encoding(this, $src); 9084 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9085 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9086 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9092 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9093 match(Set dst (PopCountVI src)); 9094 match(Set dst (PopCountVL src)); 9095 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9096 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9097 ins_encode %{ 9098 int opcode = this->ideal_Opcode(); 9099 int vlen_enc = vector_length_encoding(this, $src); 9100 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9101 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9102 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9103 %} 9104 ins_pipe( pipe_slow ); 9105 %} 9106 9107 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9108 9109 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9110 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9111 Matcher::vector_length_in_bytes(n->in(1)))); 9112 match(Set dst (CountTrailingZerosV src)); 9113 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9114 ins_cost(400); 9115 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9116 ins_encode %{ 9117 int vlen_enc = vector_length_encoding(this, $src); 9118 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9119 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9120 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9126 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9127 VM_Version::supports_avx512cd() && 9128 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9129 match(Set dst (CountTrailingZerosV src)); 9130 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9131 ins_cost(400); 9132 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9133 ins_encode %{ 9134 int vlen_enc = vector_length_encoding(this, $src); 9135 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9136 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9137 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9138 %} 9139 ins_pipe( pipe_slow ); 9140 %} 9141 9142 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9143 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9144 match(Set dst (CountTrailingZerosV src)); 9145 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9146 ins_cost(400); 9147 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9148 ins_encode %{ 9149 int vlen_enc = vector_length_encoding(this, $src); 9150 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9151 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9152 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9153 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9159 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9160 match(Set dst (CountTrailingZerosV src)); 9161 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9162 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9163 ins_encode %{ 9164 int vlen_enc = vector_length_encoding(this, $src); 9165 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9166 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9167 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9168 %} 9169 ins_pipe( pipe_slow ); 9170 %} 9171 9172 9173 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9174 9175 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9176 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9177 effect(TEMP dst); 9178 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9179 ins_encode %{ 9180 int vector_len = vector_length_encoding(this); 9181 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9182 %} 9183 ins_pipe( pipe_slow ); 9184 %} 9185 9186 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9187 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9188 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9189 effect(TEMP dst); 9190 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9191 ins_encode %{ 9192 int vector_len = vector_length_encoding(this); 9193 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 // --------------------------------- Rotation Operations ---------------------------------- 9199 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9200 match(Set dst (RotateLeftV src shift)); 9201 match(Set dst (RotateRightV src shift)); 9202 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9203 ins_encode %{ 9204 int opcode = this->ideal_Opcode(); 9205 int vector_len = vector_length_encoding(this); 9206 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9207 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9208 %} 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 instruct vprorate(vec dst, vec src, vec shift) %{ 9213 match(Set dst (RotateLeftV src shift)); 9214 match(Set dst (RotateRightV src shift)); 9215 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9216 ins_encode %{ 9217 int opcode = this->ideal_Opcode(); 9218 int vector_len = vector_length_encoding(this); 9219 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9220 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 // ---------------------------------- Masked Operations ------------------------------------ 9226 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9227 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9228 match(Set dst (LoadVectorMasked mem mask)); 9229 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9230 ins_encode %{ 9231 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9232 int vlen_enc = vector_length_encoding(this); 9233 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9234 %} 9235 ins_pipe( pipe_slow ); 9236 %} 9237 9238 9239 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9240 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9241 match(Set dst (LoadVectorMasked mem mask)); 9242 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9243 ins_encode %{ 9244 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9245 int vector_len = vector_length_encoding(this); 9246 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9247 %} 9248 ins_pipe( pipe_slow ); 9249 %} 9250 9251 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9252 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9253 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9254 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9255 ins_encode %{ 9256 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9257 int vlen_enc = vector_length_encoding(src_node); 9258 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9259 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9260 %} 9261 ins_pipe( pipe_slow ); 9262 %} 9263 9264 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9265 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9266 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9267 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9268 ins_encode %{ 9269 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9270 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9271 int vlen_enc = vector_length_encoding(src_node); 9272 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9273 %} 9274 ins_pipe( pipe_slow ); 9275 %} 9276 9277 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9278 match(Set addr (VerifyVectorAlignment addr mask)); 9279 effect(KILL cr); 9280 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9281 ins_encode %{ 9282 Label Lskip; 9283 // check if masked bits of addr are zero 9284 __ testq($addr$$Register, $mask$$constant); 9285 __ jccb(Assembler::equal, Lskip); 9286 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9287 __ bind(Lskip); 9288 %} 9289 ins_pipe(pipe_slow); 9290 %} 9291 9292 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9293 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9294 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9295 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9296 ins_encode %{ 9297 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9298 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9299 9300 Label DONE; 9301 int vlen_enc = vector_length_encoding(this, $src1); 9302 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9303 9304 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9305 __ mov64($dst$$Register, -1L); 9306 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9307 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9308 __ jccb(Assembler::carrySet, DONE); 9309 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9310 __ notq($dst$$Register); 9311 __ tzcntq($dst$$Register, $dst$$Register); 9312 __ bind(DONE); 9313 %} 9314 ins_pipe( pipe_slow ); 9315 %} 9316 9317 9318 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9319 match(Set dst (VectorMaskGen len)); 9320 effect(TEMP temp, KILL cr); 9321 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9322 ins_encode %{ 9323 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9324 %} 9325 ins_pipe( pipe_slow ); 9326 %} 9327 9328 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9329 match(Set dst (VectorMaskGen len)); 9330 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9331 effect(TEMP temp); 9332 ins_encode %{ 9333 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9334 __ kmovql($dst$$KRegister, $temp$$Register); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9340 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9341 match(Set dst (VectorMaskToLong mask)); 9342 effect(TEMP dst, KILL cr); 9343 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9344 ins_encode %{ 9345 int opcode = this->ideal_Opcode(); 9346 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9347 int mask_len = Matcher::vector_length(this, $mask); 9348 int mask_size = mask_len * type2aelembytes(mbt); 9349 int vlen_enc = vector_length_encoding(this, $mask); 9350 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9351 $dst$$Register, mask_len, mask_size, vlen_enc); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9357 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9358 match(Set dst (VectorMaskToLong mask)); 9359 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9360 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9361 ins_encode %{ 9362 int opcode = this->ideal_Opcode(); 9363 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9364 int mask_len = Matcher::vector_length(this, $mask); 9365 int vlen_enc = vector_length_encoding(this, $mask); 9366 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9367 $dst$$Register, mask_len, mbt, vlen_enc); 9368 %} 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9373 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9374 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9375 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9376 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9377 ins_encode %{ 9378 int opcode = this->ideal_Opcode(); 9379 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9380 int mask_len = Matcher::vector_length(this, $mask); 9381 int vlen_enc = vector_length_encoding(this, $mask); 9382 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9383 $dst$$Register, mask_len, mbt, vlen_enc); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9389 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9390 match(Set dst (VectorMaskTrueCount mask)); 9391 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9392 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9393 ins_encode %{ 9394 int opcode = this->ideal_Opcode(); 9395 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9396 int mask_len = Matcher::vector_length(this, $mask); 9397 int mask_size = mask_len * type2aelembytes(mbt); 9398 int vlen_enc = vector_length_encoding(this, $mask); 9399 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9400 $tmp$$Register, mask_len, mask_size, vlen_enc); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9406 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9407 match(Set dst (VectorMaskTrueCount mask)); 9408 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9409 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9410 ins_encode %{ 9411 int opcode = this->ideal_Opcode(); 9412 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9413 int mask_len = Matcher::vector_length(this, $mask); 9414 int vlen_enc = vector_length_encoding(this, $mask); 9415 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9416 $tmp$$Register, mask_len, mbt, vlen_enc); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9422 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9423 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9424 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9425 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9426 ins_encode %{ 9427 int opcode = this->ideal_Opcode(); 9428 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9429 int mask_len = Matcher::vector_length(this, $mask); 9430 int vlen_enc = vector_length_encoding(this, $mask); 9431 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9432 $tmp$$Register, mask_len, mbt, vlen_enc); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9438 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9439 match(Set dst (VectorMaskFirstTrue mask)); 9440 match(Set dst (VectorMaskLastTrue mask)); 9441 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9442 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9443 ins_encode %{ 9444 int opcode = this->ideal_Opcode(); 9445 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9446 int mask_len = Matcher::vector_length(this, $mask); 9447 int mask_size = mask_len * type2aelembytes(mbt); 9448 int vlen_enc = vector_length_encoding(this, $mask); 9449 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9450 $tmp$$Register, mask_len, mask_size, vlen_enc); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9456 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9457 match(Set dst (VectorMaskFirstTrue mask)); 9458 match(Set dst (VectorMaskLastTrue mask)); 9459 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9460 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9461 ins_encode %{ 9462 int opcode = this->ideal_Opcode(); 9463 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9464 int mask_len = Matcher::vector_length(this, $mask); 9465 int vlen_enc = vector_length_encoding(this, $mask); 9466 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9467 $tmp$$Register, mask_len, mbt, vlen_enc); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9473 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9474 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9475 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9476 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9477 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9478 ins_encode %{ 9479 int opcode = this->ideal_Opcode(); 9480 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9481 int mask_len = Matcher::vector_length(this, $mask); 9482 int vlen_enc = vector_length_encoding(this, $mask); 9483 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9484 $tmp$$Register, mask_len, mbt, vlen_enc); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 // --------------------------------- Compress/Expand Operations --------------------------- 9490 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9491 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9492 match(Set dst (CompressV src mask)); 9493 match(Set dst (ExpandV src mask)); 9494 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9495 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9496 ins_encode %{ 9497 int opcode = this->ideal_Opcode(); 9498 int vlen_enc = vector_length_encoding(this); 9499 BasicType bt = Matcher::vector_element_basic_type(this); 9500 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9501 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9502 %} 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9507 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9508 match(Set dst (CompressV src mask)); 9509 match(Set dst (ExpandV src mask)); 9510 format %{ "vector_compress_expand $dst, $src, $mask" %} 9511 ins_encode %{ 9512 int opcode = this->ideal_Opcode(); 9513 int vector_len = vector_length_encoding(this); 9514 BasicType bt = Matcher::vector_element_basic_type(this); 9515 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9521 match(Set dst (CompressM mask)); 9522 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9523 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9524 ins_encode %{ 9525 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9526 int mask_len = Matcher::vector_length(this); 9527 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9528 %} 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9533 9534 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9535 predicate(!VM_Version::supports_gfni()); 9536 match(Set dst (ReverseV src)); 9537 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9538 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9539 ins_encode %{ 9540 int vec_enc = vector_length_encoding(this); 9541 BasicType bt = Matcher::vector_element_basic_type(this); 9542 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9543 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9544 %} 9545 ins_pipe( pipe_slow ); 9546 %} 9547 9548 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9549 predicate(VM_Version::supports_gfni()); 9550 match(Set dst (ReverseV src)); 9551 effect(TEMP dst, TEMP xtmp); 9552 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9553 ins_encode %{ 9554 int vec_enc = vector_length_encoding(this); 9555 BasicType bt = Matcher::vector_element_basic_type(this); 9556 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9557 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9558 $xtmp$$XMMRegister); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vreverse_byte_reg(vec dst, vec src) %{ 9564 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9565 match(Set dst (ReverseBytesV src)); 9566 effect(TEMP dst); 9567 format %{ "vector_reverse_byte $dst, $src" %} 9568 ins_encode %{ 9569 int vec_enc = vector_length_encoding(this); 9570 BasicType bt = Matcher::vector_element_basic_type(this); 9571 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9577 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9578 match(Set dst (ReverseBytesV src)); 9579 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9580 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9581 ins_encode %{ 9582 int vec_enc = vector_length_encoding(this); 9583 BasicType bt = Matcher::vector_element_basic_type(this); 9584 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9585 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9586 %} 9587 ins_pipe( pipe_slow ); 9588 %} 9589 9590 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9591 9592 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9593 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9594 Matcher::vector_length_in_bytes(n->in(1)))); 9595 match(Set dst (CountLeadingZerosV src)); 9596 format %{ "vector_count_leading_zeros $dst, $src" %} 9597 ins_encode %{ 9598 int vlen_enc = vector_length_encoding(this, $src); 9599 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9600 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9601 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9602 %} 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9607 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9608 Matcher::vector_length_in_bytes(n->in(1)))); 9609 match(Set dst (CountLeadingZerosV src mask)); 9610 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9611 ins_encode %{ 9612 int vlen_enc = vector_length_encoding(this, $src); 9613 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9614 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9615 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9616 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9617 %} 9618 ins_pipe( pipe_slow ); 9619 %} 9620 9621 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9622 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9623 VM_Version::supports_avx512cd() && 9624 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9625 match(Set dst (CountLeadingZerosV src)); 9626 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9627 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9628 ins_encode %{ 9629 int vlen_enc = vector_length_encoding(this, $src); 9630 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9631 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9632 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9638 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9639 match(Set dst (CountLeadingZerosV src)); 9640 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9641 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9642 ins_encode %{ 9643 int vlen_enc = vector_length_encoding(this, $src); 9644 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9645 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9646 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9647 $rtmp$$Register, true, vlen_enc); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9653 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9654 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9655 match(Set dst (CountLeadingZerosV src)); 9656 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9657 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9658 ins_encode %{ 9659 int vlen_enc = vector_length_encoding(this, $src); 9660 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9661 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9662 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9663 %} 9664 ins_pipe( pipe_slow ); 9665 %} 9666 9667 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9668 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9669 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9670 match(Set dst (CountLeadingZerosV src)); 9671 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9672 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9673 ins_encode %{ 9674 int vlen_enc = vector_length_encoding(this, $src); 9675 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9676 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9677 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9678 %} 9679 ins_pipe( pipe_slow ); 9680 %} 9681 9682 // ---------------------------------- Vector Masked Operations ------------------------------------ 9683 9684 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9685 match(Set dst (AddVB (Binary dst src2) mask)); 9686 match(Set dst (AddVS (Binary dst src2) mask)); 9687 match(Set dst (AddVI (Binary dst src2) mask)); 9688 match(Set dst (AddVL (Binary dst src2) mask)); 9689 match(Set dst (AddVF (Binary dst src2) mask)); 9690 match(Set dst (AddVD (Binary dst src2) mask)); 9691 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9692 ins_encode %{ 9693 int vlen_enc = vector_length_encoding(this); 9694 BasicType bt = Matcher::vector_element_basic_type(this); 9695 int opc = this->ideal_Opcode(); 9696 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9697 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9698 %} 9699 ins_pipe( pipe_slow ); 9700 %} 9701 9702 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9703 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9704 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9705 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9706 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9707 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9708 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9709 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9710 ins_encode %{ 9711 int vlen_enc = vector_length_encoding(this); 9712 BasicType bt = Matcher::vector_element_basic_type(this); 9713 int opc = this->ideal_Opcode(); 9714 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9715 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9716 %} 9717 ins_pipe( pipe_slow ); 9718 %} 9719 9720 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9721 match(Set dst (XorV (Binary dst src2) mask)); 9722 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9723 ins_encode %{ 9724 int vlen_enc = vector_length_encoding(this); 9725 BasicType bt = Matcher::vector_element_basic_type(this); 9726 int opc = this->ideal_Opcode(); 9727 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9728 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9729 %} 9730 ins_pipe( pipe_slow ); 9731 %} 9732 9733 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9734 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9735 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9736 ins_encode %{ 9737 int vlen_enc = vector_length_encoding(this); 9738 BasicType bt = Matcher::vector_element_basic_type(this); 9739 int opc = this->ideal_Opcode(); 9740 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9741 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9742 %} 9743 ins_pipe( pipe_slow ); 9744 %} 9745 9746 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9747 match(Set dst (OrV (Binary dst src2) mask)); 9748 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9749 ins_encode %{ 9750 int vlen_enc = vector_length_encoding(this); 9751 BasicType bt = Matcher::vector_element_basic_type(this); 9752 int opc = this->ideal_Opcode(); 9753 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9754 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9755 %} 9756 ins_pipe( pipe_slow ); 9757 %} 9758 9759 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9760 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9761 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9762 ins_encode %{ 9763 int vlen_enc = vector_length_encoding(this); 9764 BasicType bt = Matcher::vector_element_basic_type(this); 9765 int opc = this->ideal_Opcode(); 9766 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9767 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9773 match(Set dst (AndV (Binary dst src2) mask)); 9774 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9775 ins_encode %{ 9776 int vlen_enc = vector_length_encoding(this); 9777 BasicType bt = Matcher::vector_element_basic_type(this); 9778 int opc = this->ideal_Opcode(); 9779 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9780 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9781 %} 9782 ins_pipe( pipe_slow ); 9783 %} 9784 9785 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9786 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9787 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9788 ins_encode %{ 9789 int vlen_enc = vector_length_encoding(this); 9790 BasicType bt = Matcher::vector_element_basic_type(this); 9791 int opc = this->ideal_Opcode(); 9792 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9793 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9794 %} 9795 ins_pipe( pipe_slow ); 9796 %} 9797 9798 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9799 match(Set dst (SubVB (Binary dst src2) mask)); 9800 match(Set dst (SubVS (Binary dst src2) mask)); 9801 match(Set dst (SubVI (Binary dst src2) mask)); 9802 match(Set dst (SubVL (Binary dst src2) mask)); 9803 match(Set dst (SubVF (Binary dst src2) mask)); 9804 match(Set dst (SubVD (Binary dst src2) mask)); 9805 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9806 ins_encode %{ 9807 int vlen_enc = vector_length_encoding(this); 9808 BasicType bt = Matcher::vector_element_basic_type(this); 9809 int opc = this->ideal_Opcode(); 9810 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9811 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9812 %} 9813 ins_pipe( pipe_slow ); 9814 %} 9815 9816 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9817 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9818 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9819 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9820 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9821 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9822 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9823 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9824 ins_encode %{ 9825 int vlen_enc = vector_length_encoding(this); 9826 BasicType bt = Matcher::vector_element_basic_type(this); 9827 int opc = this->ideal_Opcode(); 9828 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9829 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9830 %} 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9835 match(Set dst (MulVS (Binary dst src2) mask)); 9836 match(Set dst (MulVI (Binary dst src2) mask)); 9837 match(Set dst (MulVL (Binary dst src2) mask)); 9838 match(Set dst (MulVF (Binary dst src2) mask)); 9839 match(Set dst (MulVD (Binary dst src2) mask)); 9840 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9841 ins_encode %{ 9842 int vlen_enc = vector_length_encoding(this); 9843 BasicType bt = Matcher::vector_element_basic_type(this); 9844 int opc = this->ideal_Opcode(); 9845 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9846 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9852 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9853 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9854 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9855 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9856 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9857 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9858 ins_encode %{ 9859 int vlen_enc = vector_length_encoding(this); 9860 BasicType bt = Matcher::vector_element_basic_type(this); 9861 int opc = this->ideal_Opcode(); 9862 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9863 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9869 match(Set dst (SqrtVF dst mask)); 9870 match(Set dst (SqrtVD dst mask)); 9871 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9872 ins_encode %{ 9873 int vlen_enc = vector_length_encoding(this); 9874 BasicType bt = Matcher::vector_element_basic_type(this); 9875 int opc = this->ideal_Opcode(); 9876 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9877 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9878 %} 9879 ins_pipe( pipe_slow ); 9880 %} 9881 9882 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9883 match(Set dst (DivVF (Binary dst src2) mask)); 9884 match(Set dst (DivVD (Binary dst src2) mask)); 9885 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9886 ins_encode %{ 9887 int vlen_enc = vector_length_encoding(this); 9888 BasicType bt = Matcher::vector_element_basic_type(this); 9889 int opc = this->ideal_Opcode(); 9890 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9891 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9892 %} 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9897 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9898 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9899 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9900 ins_encode %{ 9901 int vlen_enc = vector_length_encoding(this); 9902 BasicType bt = Matcher::vector_element_basic_type(this); 9903 int opc = this->ideal_Opcode(); 9904 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9905 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9906 %} 9907 ins_pipe( pipe_slow ); 9908 %} 9909 9910 9911 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9912 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9913 match(Set dst (RotateRightV (Binary dst shift) mask)); 9914 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9915 ins_encode %{ 9916 int vlen_enc = vector_length_encoding(this); 9917 BasicType bt = Matcher::vector_element_basic_type(this); 9918 int opc = this->ideal_Opcode(); 9919 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9920 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9921 %} 9922 ins_pipe( pipe_slow ); 9923 %} 9924 9925 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9926 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9927 match(Set dst (RotateRightV (Binary dst src2) mask)); 9928 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9929 ins_encode %{ 9930 int vlen_enc = vector_length_encoding(this); 9931 BasicType bt = Matcher::vector_element_basic_type(this); 9932 int opc = this->ideal_Opcode(); 9933 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9934 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9940 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9941 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9942 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9943 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9944 ins_encode %{ 9945 int vlen_enc = vector_length_encoding(this); 9946 BasicType bt = Matcher::vector_element_basic_type(this); 9947 int opc = this->ideal_Opcode(); 9948 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9949 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9950 %} 9951 ins_pipe( pipe_slow ); 9952 %} 9953 9954 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9955 predicate(!n->as_ShiftV()->is_var_shift()); 9956 match(Set dst (LShiftVS (Binary dst src2) mask)); 9957 match(Set dst (LShiftVI (Binary dst src2) mask)); 9958 match(Set dst (LShiftVL (Binary dst src2) mask)); 9959 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9960 ins_encode %{ 9961 int vlen_enc = vector_length_encoding(this); 9962 BasicType bt = Matcher::vector_element_basic_type(this); 9963 int opc = this->ideal_Opcode(); 9964 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9965 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9966 %} 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9971 predicate(n->as_ShiftV()->is_var_shift()); 9972 match(Set dst (LShiftVS (Binary dst src2) mask)); 9973 match(Set dst (LShiftVI (Binary dst src2) mask)); 9974 match(Set dst (LShiftVL (Binary dst src2) mask)); 9975 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9976 ins_encode %{ 9977 int vlen_enc = vector_length_encoding(this); 9978 BasicType bt = Matcher::vector_element_basic_type(this); 9979 int opc = this->ideal_Opcode(); 9980 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9981 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9982 %} 9983 ins_pipe( pipe_slow ); 9984 %} 9985 9986 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9987 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9988 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9989 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9990 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9991 ins_encode %{ 9992 int vlen_enc = vector_length_encoding(this); 9993 BasicType bt = Matcher::vector_element_basic_type(this); 9994 int opc = this->ideal_Opcode(); 9995 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9996 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9997 %} 9998 ins_pipe( pipe_slow ); 9999 %} 10000 10001 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10002 predicate(!n->as_ShiftV()->is_var_shift()); 10003 match(Set dst (RShiftVS (Binary dst src2) mask)); 10004 match(Set dst (RShiftVI (Binary dst src2) mask)); 10005 match(Set dst (RShiftVL (Binary dst src2) mask)); 10006 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10007 ins_encode %{ 10008 int vlen_enc = vector_length_encoding(this); 10009 BasicType bt = Matcher::vector_element_basic_type(this); 10010 int opc = this->ideal_Opcode(); 10011 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10012 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10013 %} 10014 ins_pipe( pipe_slow ); 10015 %} 10016 10017 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10018 predicate(n->as_ShiftV()->is_var_shift()); 10019 match(Set dst (RShiftVS (Binary dst src2) mask)); 10020 match(Set dst (RShiftVI (Binary dst src2) mask)); 10021 match(Set dst (RShiftVL (Binary dst src2) mask)); 10022 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10023 ins_encode %{ 10024 int vlen_enc = vector_length_encoding(this); 10025 BasicType bt = Matcher::vector_element_basic_type(this); 10026 int opc = this->ideal_Opcode(); 10027 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10028 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10029 %} 10030 ins_pipe( pipe_slow ); 10031 %} 10032 10033 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10034 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10035 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10036 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10037 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10038 ins_encode %{ 10039 int vlen_enc = vector_length_encoding(this); 10040 BasicType bt = Matcher::vector_element_basic_type(this); 10041 int opc = this->ideal_Opcode(); 10042 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10043 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10049 predicate(!n->as_ShiftV()->is_var_shift()); 10050 match(Set dst (URShiftVS (Binary dst src2) mask)); 10051 match(Set dst (URShiftVI (Binary dst src2) mask)); 10052 match(Set dst (URShiftVL (Binary dst src2) mask)); 10053 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10054 ins_encode %{ 10055 int vlen_enc = vector_length_encoding(this); 10056 BasicType bt = Matcher::vector_element_basic_type(this); 10057 int opc = this->ideal_Opcode(); 10058 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10059 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10060 %} 10061 ins_pipe( pipe_slow ); 10062 %} 10063 10064 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10065 predicate(n->as_ShiftV()->is_var_shift()); 10066 match(Set dst (URShiftVS (Binary dst src2) mask)); 10067 match(Set dst (URShiftVI (Binary dst src2) mask)); 10068 match(Set dst (URShiftVL (Binary dst src2) mask)); 10069 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10070 ins_encode %{ 10071 int vlen_enc = vector_length_encoding(this); 10072 BasicType bt = Matcher::vector_element_basic_type(this); 10073 int opc = this->ideal_Opcode(); 10074 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10075 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10076 %} 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10081 match(Set dst (MaxV (Binary dst src2) mask)); 10082 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10083 ins_encode %{ 10084 int vlen_enc = vector_length_encoding(this); 10085 BasicType bt = Matcher::vector_element_basic_type(this); 10086 int opc = this->ideal_Opcode(); 10087 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10088 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10089 %} 10090 ins_pipe( pipe_slow ); 10091 %} 10092 10093 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10094 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10095 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10096 ins_encode %{ 10097 int vlen_enc = vector_length_encoding(this); 10098 BasicType bt = Matcher::vector_element_basic_type(this); 10099 int opc = this->ideal_Opcode(); 10100 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10101 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10107 match(Set dst (MinV (Binary dst src2) mask)); 10108 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10109 ins_encode %{ 10110 int vlen_enc = vector_length_encoding(this); 10111 BasicType bt = Matcher::vector_element_basic_type(this); 10112 int opc = this->ideal_Opcode(); 10113 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10114 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10115 %} 10116 ins_pipe( pipe_slow ); 10117 %} 10118 10119 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10120 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10121 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10122 ins_encode %{ 10123 int vlen_enc = vector_length_encoding(this); 10124 BasicType bt = Matcher::vector_element_basic_type(this); 10125 int opc = this->ideal_Opcode(); 10126 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10127 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10133 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10134 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10135 ins_encode %{ 10136 int vlen_enc = vector_length_encoding(this); 10137 BasicType bt = Matcher::vector_element_basic_type(this); 10138 int opc = this->ideal_Opcode(); 10139 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10140 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10141 %} 10142 ins_pipe( pipe_slow ); 10143 %} 10144 10145 instruct vabs_masked(vec dst, kReg mask) %{ 10146 match(Set dst (AbsVB dst mask)); 10147 match(Set dst (AbsVS dst mask)); 10148 match(Set dst (AbsVI dst mask)); 10149 match(Set dst (AbsVL dst mask)); 10150 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10151 ins_encode %{ 10152 int vlen_enc = vector_length_encoding(this); 10153 BasicType bt = Matcher::vector_element_basic_type(this); 10154 int opc = this->ideal_Opcode(); 10155 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10156 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10157 %} 10158 ins_pipe( pipe_slow ); 10159 %} 10160 10161 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10162 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10163 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10164 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10165 ins_encode %{ 10166 assert(UseFMA, "Needs FMA instructions support."); 10167 int vlen_enc = vector_length_encoding(this); 10168 BasicType bt = Matcher::vector_element_basic_type(this); 10169 int opc = this->ideal_Opcode(); 10170 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10171 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10172 %} 10173 ins_pipe( pipe_slow ); 10174 %} 10175 10176 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10177 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10178 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10179 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10180 ins_encode %{ 10181 assert(UseFMA, "Needs FMA instructions support."); 10182 int vlen_enc = vector_length_encoding(this); 10183 BasicType bt = Matcher::vector_element_basic_type(this); 10184 int opc = this->ideal_Opcode(); 10185 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10186 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10187 %} 10188 ins_pipe( pipe_slow ); 10189 %} 10190 10191 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10192 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10193 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10194 ins_encode %{ 10195 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10196 int vlen_enc = vector_length_encoding(this, $src1); 10197 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10198 10199 // Comparison i 10200 switch (src1_elem_bt) { 10201 case T_BYTE: { 10202 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10203 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10204 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10205 break; 10206 } 10207 case T_SHORT: { 10208 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10209 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10210 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10211 break; 10212 } 10213 case T_INT: { 10214 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10215 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10216 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10217 break; 10218 } 10219 case T_LONG: { 10220 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10221 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10222 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10223 break; 10224 } 10225 case T_FLOAT: { 10226 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10227 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10228 break; 10229 } 10230 case T_DOUBLE: { 10231 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10232 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10233 break; 10234 } 10235 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10236 } 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10242 predicate(Matcher::vector_length(n) <= 32); 10243 match(Set dst (MaskAll src)); 10244 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10245 ins_encode %{ 10246 int mask_len = Matcher::vector_length(this); 10247 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10248 %} 10249 ins_pipe( pipe_slow ); 10250 %} 10251 10252 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10253 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10254 match(Set dst (XorVMask src (MaskAll cnt))); 10255 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10256 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10257 ins_encode %{ 10258 uint masklen = Matcher::vector_length(this); 10259 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10265 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10266 (Matcher::vector_length(n) == 16) || 10267 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10268 match(Set dst (XorVMask src (MaskAll cnt))); 10269 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10270 ins_encode %{ 10271 uint masklen = Matcher::vector_length(this); 10272 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10273 %} 10274 ins_pipe( pipe_slow ); 10275 %} 10276 10277 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10278 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10279 match(Set dst (VectorLongToMask src)); 10280 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10281 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10282 ins_encode %{ 10283 int mask_len = Matcher::vector_length(this); 10284 int vec_enc = vector_length_encoding(mask_len); 10285 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10286 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10287 %} 10288 ins_pipe( pipe_slow ); 10289 %} 10290 10291 10292 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10293 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10294 match(Set dst (VectorLongToMask src)); 10295 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10296 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10297 ins_encode %{ 10298 int mask_len = Matcher::vector_length(this); 10299 assert(mask_len <= 32, "invalid mask length"); 10300 int vec_enc = vector_length_encoding(mask_len); 10301 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10302 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10303 %} 10304 ins_pipe( pipe_slow ); 10305 %} 10306 10307 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10308 predicate(n->bottom_type()->isa_vectmask()); 10309 match(Set dst (VectorLongToMask src)); 10310 format %{ "long_to_mask_evex $dst, $src\t!" %} 10311 ins_encode %{ 10312 __ kmov($dst$$KRegister, $src$$Register); 10313 %} 10314 ins_pipe( pipe_slow ); 10315 %} 10316 10317 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10318 match(Set dst (AndVMask src1 src2)); 10319 match(Set dst (OrVMask src1 src2)); 10320 match(Set dst (XorVMask src1 src2)); 10321 effect(TEMP kscratch); 10322 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10323 ins_encode %{ 10324 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10325 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10326 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10327 uint masklen = Matcher::vector_length(this); 10328 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10329 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10330 %} 10331 ins_pipe( pipe_slow ); 10332 %} 10333 10334 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10335 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10336 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10337 ins_encode %{ 10338 int vlen_enc = vector_length_encoding(this); 10339 BasicType bt = Matcher::vector_element_basic_type(this); 10340 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10341 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10342 %} 10343 ins_pipe( pipe_slow ); 10344 %} 10345 10346 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10347 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10348 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10349 ins_encode %{ 10350 int vlen_enc = vector_length_encoding(this); 10351 BasicType bt = Matcher::vector_element_basic_type(this); 10352 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10353 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10354 %} 10355 ins_pipe( pipe_slow ); 10356 %} 10357 10358 instruct castMM(kReg dst) 10359 %{ 10360 match(Set dst (CastVV dst)); 10361 10362 size(0); 10363 format %{ "# castVV of $dst" %} 10364 ins_encode(/* empty encoding */); 10365 ins_cost(0); 10366 ins_pipe(empty); 10367 %} 10368 10369 instruct castVV(vec dst) 10370 %{ 10371 match(Set dst (CastVV dst)); 10372 10373 size(0); 10374 format %{ "# castVV of $dst" %} 10375 ins_encode(/* empty encoding */); 10376 ins_cost(0); 10377 ins_pipe(empty); 10378 %} 10379 10380 instruct castVVLeg(legVec dst) 10381 %{ 10382 match(Set dst (CastVV dst)); 10383 10384 size(0); 10385 format %{ "# castVV of $dst" %} 10386 ins_encode(/* empty encoding */); 10387 ins_cost(0); 10388 ins_pipe(empty); 10389 %} 10390 10391 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10392 %{ 10393 match(Set dst (IsInfiniteF src)); 10394 effect(TEMP ktmp, KILL cr); 10395 format %{ "float_class_check $dst, $src" %} 10396 ins_encode %{ 10397 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10398 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10399 %} 10400 ins_pipe(pipe_slow); 10401 %} 10402 10403 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10404 %{ 10405 match(Set dst (IsInfiniteD src)); 10406 effect(TEMP ktmp, KILL cr); 10407 format %{ "double_class_check $dst, $src" %} 10408 ins_encode %{ 10409 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10410 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10411 %} 10412 ins_pipe(pipe_slow); 10413 %} 10414 10415 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10416 %{ 10417 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10418 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10419 match(Set dst (SaturatingAddV src1 src2)); 10420 match(Set dst (SaturatingSubV src1 src2)); 10421 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10422 ins_encode %{ 10423 int vlen_enc = vector_length_encoding(this); 10424 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10425 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10426 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10427 %} 10428 ins_pipe(pipe_slow); 10429 %} 10430 10431 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10432 %{ 10433 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10434 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10435 match(Set dst (SaturatingAddV src1 src2)); 10436 match(Set dst (SaturatingSubV src1 src2)); 10437 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10438 ins_encode %{ 10439 int vlen_enc = vector_length_encoding(this); 10440 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10441 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10442 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10443 %} 10444 ins_pipe(pipe_slow); 10445 %} 10446 10447 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10448 %{ 10449 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10450 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10451 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10452 match(Set dst (SaturatingAddV src1 src2)); 10453 match(Set dst (SaturatingSubV src1 src2)); 10454 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10455 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10456 ins_encode %{ 10457 int vlen_enc = vector_length_encoding(this); 10458 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10459 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10460 $src1$$XMMRegister, $src2$$XMMRegister, 10461 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10462 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10463 %} 10464 ins_pipe(pipe_slow); 10465 %} 10466 10467 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10468 %{ 10469 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10470 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10471 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10472 match(Set dst (SaturatingAddV src1 src2)); 10473 match(Set dst (SaturatingSubV src1 src2)); 10474 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10475 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10476 ins_encode %{ 10477 int vlen_enc = vector_length_encoding(this); 10478 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10479 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10480 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10481 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10482 %} 10483 ins_pipe(pipe_slow); 10484 %} 10485 10486 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10487 %{ 10488 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10489 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10490 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10491 match(Set dst (SaturatingAddV src1 src2)); 10492 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10493 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10494 ins_encode %{ 10495 int vlen_enc = vector_length_encoding(this); 10496 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10497 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10498 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10499 %} 10500 ins_pipe(pipe_slow); 10501 %} 10502 10503 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10504 %{ 10505 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10506 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10507 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10508 match(Set dst (SaturatingAddV src1 src2)); 10509 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10510 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10511 ins_encode %{ 10512 int vlen_enc = vector_length_encoding(this); 10513 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10514 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10515 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10516 %} 10517 ins_pipe(pipe_slow); 10518 %} 10519 10520 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10521 %{ 10522 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10523 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10524 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10525 match(Set dst (SaturatingSubV src1 src2)); 10526 effect(TEMP ktmp); 10527 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10528 ins_encode %{ 10529 int vlen_enc = vector_length_encoding(this); 10530 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10531 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10532 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10533 %} 10534 ins_pipe(pipe_slow); 10535 %} 10536 10537 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10538 %{ 10539 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10540 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10541 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10542 match(Set dst (SaturatingSubV src1 src2)); 10543 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10544 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10545 ins_encode %{ 10546 int vlen_enc = vector_length_encoding(this); 10547 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10548 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10549 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10550 %} 10551 ins_pipe(pipe_slow); 10552 %} 10553 10554 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10555 %{ 10556 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10557 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10558 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10559 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10560 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10561 ins_encode %{ 10562 int vlen_enc = vector_length_encoding(this); 10563 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10564 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10565 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10566 %} 10567 ins_pipe(pipe_slow); 10568 %} 10569 10570 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10571 %{ 10572 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10573 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10574 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10575 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10576 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10577 ins_encode %{ 10578 int vlen_enc = vector_length_encoding(this); 10579 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10580 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10581 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10582 %} 10583 ins_pipe(pipe_slow); 10584 %} 10585 10586 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10587 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10588 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10589 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10590 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10591 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10592 ins_encode %{ 10593 int vlen_enc = vector_length_encoding(this); 10594 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10595 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10596 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10597 %} 10598 ins_pipe( pipe_slow ); 10599 %} 10600 10601 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10602 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10603 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10604 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10605 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10606 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10607 ins_encode %{ 10608 int vlen_enc = vector_length_encoding(this); 10609 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10610 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10611 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10612 %} 10613 ins_pipe( pipe_slow ); 10614 %} 10615 10616 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10617 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10618 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10619 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10620 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10621 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10622 ins_encode %{ 10623 int vlen_enc = vector_length_encoding(this); 10624 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10625 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10626 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10627 %} 10628 ins_pipe( pipe_slow ); 10629 %} 10630 10631 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10632 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10633 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10634 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10635 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10636 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10637 ins_encode %{ 10638 int vlen_enc = vector_length_encoding(this); 10639 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10640 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10641 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10642 %} 10643 ins_pipe( pipe_slow ); 10644 %} 10645 10646 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10647 %{ 10648 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10649 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10650 ins_encode %{ 10651 int vlen_enc = vector_length_encoding(this); 10652 BasicType bt = Matcher::vector_element_basic_type(this); 10653 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10654 %} 10655 ins_pipe(pipe_slow); 10656 %} 10657 10658 instruct reinterpretS2HF(regF dst, rRegI src) 10659 %{ 10660 match(Set dst (ReinterpretS2HF src)); 10661 format %{ "vmovw $dst, $src" %} 10662 ins_encode %{ 10663 __ vmovw($dst$$XMMRegister, $src$$Register); 10664 %} 10665 ins_pipe(pipe_slow); 10666 %} 10667 10668 instruct reinterpretHF2S(rRegI dst, regF src) 10669 %{ 10670 match(Set dst (ReinterpretHF2S src)); 10671 format %{ "vmovw $dst, $src" %} 10672 ins_encode %{ 10673 __ vmovw($dst$$Register, $src$$XMMRegister); 10674 %} 10675 ins_pipe(pipe_slow); 10676 %} 10677 10678 instruct convF2HFAndS2HF(regF dst, regF src) 10679 %{ 10680 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10681 format %{ "convF2HFAndS2HF $dst, $src" %} 10682 ins_encode %{ 10683 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10684 %} 10685 ins_pipe(pipe_slow); 10686 %} 10687 10688 instruct convHF2SAndHF2F(regF dst, regF src) 10689 %{ 10690 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10691 format %{ "convHF2SAndHF2F $dst, $src" %} 10692 ins_encode %{ 10693 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10694 %} 10695 ins_pipe(pipe_slow); 10696 %} 10697 10698 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10699 %{ 10700 match(Set dst (SqrtHF src)); 10701 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10702 ins_encode %{ 10703 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10704 %} 10705 ins_pipe(pipe_slow); 10706 %} 10707 10708 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10709 %{ 10710 match(Set dst (AddHF src1 src2)); 10711 match(Set dst (DivHF src1 src2)); 10712 match(Set dst (MulHF src1 src2)); 10713 match(Set dst (SubHF src1 src2)); 10714 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10715 ins_encode %{ 10716 int opcode = this->ideal_Opcode(); 10717 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10718 %} 10719 ins_pipe(pipe_slow); 10720 %} 10721 10722 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10723 %{ 10724 match(Set dst (MaxHF src1 src2)); 10725 match(Set dst (MinHF src1 src2)); 10726 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10727 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10728 ins_encode %{ 10729 int opcode = this->ideal_Opcode(); 10730 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10731 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10732 %} 10733 ins_pipe( pipe_slow ); 10734 %} 10735 10736 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10737 %{ 10738 match(Set dst (FmaHF src2 (Binary dst src1))); 10739 effect(DEF dst); 10740 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10741 ins_encode %{ 10742 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10743 %} 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 10748 instruct vector_sqrt_HF_reg(vec dst, vec src) 10749 %{ 10750 match(Set dst (SqrtVHF src)); 10751 format %{ "vector_sqrt_fp16 $dst, $src" %} 10752 ins_encode %{ 10753 int vlen_enc = vector_length_encoding(this); 10754 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10755 %} 10756 ins_pipe(pipe_slow); 10757 %} 10758 10759 instruct vector_sqrt_HF_mem(vec dst, memory src) 10760 %{ 10761 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10762 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10763 ins_encode %{ 10764 int vlen_enc = vector_length_encoding(this); 10765 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10766 %} 10767 ins_pipe(pipe_slow); 10768 %} 10769 10770 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10771 %{ 10772 match(Set dst (AddVHF src1 src2)); 10773 match(Set dst (DivVHF src1 src2)); 10774 match(Set dst (MulVHF src1 src2)); 10775 match(Set dst (SubVHF src1 src2)); 10776 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10777 ins_encode %{ 10778 int vlen_enc = vector_length_encoding(this); 10779 int opcode = this->ideal_Opcode(); 10780 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10781 %} 10782 ins_pipe(pipe_slow); 10783 %} 10784 10785 10786 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10787 %{ 10788 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10789 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10790 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10791 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10792 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10793 ins_encode %{ 10794 int vlen_enc = vector_length_encoding(this); 10795 int opcode = this->ideal_Opcode(); 10796 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10797 %} 10798 ins_pipe(pipe_slow); 10799 %} 10800 10801 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10802 %{ 10803 match(Set dst (FmaVHF src2 (Binary dst src1))); 10804 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10805 ins_encode %{ 10806 int vlen_enc = vector_length_encoding(this); 10807 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10808 %} 10809 ins_pipe( pipe_slow ); 10810 %} 10811 10812 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10813 %{ 10814 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10815 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10816 ins_encode %{ 10817 int vlen_enc = vector_length_encoding(this); 10818 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10819 %} 10820 ins_pipe( pipe_slow ); 10821 %} 10822 10823 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 10824 %{ 10825 match(Set dst (MinVHF src1 src2)); 10826 match(Set dst (MaxVHF src1 src2)); 10827 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10828 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10829 ins_encode %{ 10830 int vlen_enc = vector_length_encoding(this); 10831 int opcode = this->ideal_Opcode(); 10832 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 10833 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10834 %} 10835 ins_pipe( pipe_slow ); 10836 %}