1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_UMinV: 1769 case Op_UMaxV: 1770 if (UseAVX == 0) { 1771 return false; 1772 } 1773 break; 1774 case Op_MaxV: 1775 case Op_MinV: 1776 if (UseSSE < 4 && is_integral_type(bt)) { 1777 return false; 1778 } 1779 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1780 // Float/Double intrinsics are enabled for AVX family currently. 1781 if (UseAVX == 0) { 1782 return false; 1783 } 1784 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1785 return false; 1786 } 1787 } 1788 break; 1789 case Op_CallLeafVector: 1790 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1791 return false; 1792 } 1793 break; 1794 case Op_AddReductionVI: 1795 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1796 return false; 1797 } 1798 // fallthrough 1799 case Op_AndReductionV: 1800 case Op_OrReductionV: 1801 case Op_XorReductionV: 1802 if (is_subword_type(bt) && (UseSSE < 4)) { 1803 return false; 1804 } 1805 #ifndef _LP64 1806 if (bt == T_BYTE || bt == T_LONG) { 1807 return false; 1808 } 1809 #endif 1810 break; 1811 #ifndef _LP64 1812 case Op_VectorInsert: 1813 if (bt == T_LONG || bt == T_DOUBLE) { 1814 return false; 1815 } 1816 break; 1817 #endif 1818 case Op_MinReductionV: 1819 case Op_MaxReductionV: 1820 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1821 return false; 1822 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1823 return false; 1824 } 1825 // Float/Double intrinsics enabled for AVX family. 1826 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1827 return false; 1828 } 1829 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1830 return false; 1831 } 1832 #ifndef _LP64 1833 if (bt == T_BYTE || bt == T_LONG) { 1834 return false; 1835 } 1836 #endif 1837 break; 1838 case Op_VectorTest: 1839 if (UseSSE < 4) { 1840 return false; // Implementation limitation 1841 } else if (size_in_bits < 32) { 1842 return false; // Implementation limitation 1843 } 1844 break; 1845 case Op_VectorLoadShuffle: 1846 case Op_VectorRearrange: 1847 if(vlen == 2) { 1848 return false; // Implementation limitation due to how shuffle is loaded 1849 } else if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 break; 1853 case Op_VectorLoadMask: 1854 case Op_VectorMaskCast: 1855 if (size_in_bits == 256 && UseAVX < 2) { 1856 return false; // Implementation limitation 1857 } 1858 // fallthrough 1859 case Op_VectorStoreMask: 1860 if (vlen == 2) { 1861 return false; // Implementation limitation 1862 } 1863 break; 1864 case Op_PopulateIndex: 1865 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1866 return false; 1867 } 1868 break; 1869 case Op_VectorCastB2X: 1870 case Op_VectorCastS2X: 1871 case Op_VectorCastI2X: 1872 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1873 return false; 1874 } 1875 break; 1876 case Op_VectorCastL2X: 1877 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1878 return false; 1879 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1880 return false; 1881 } 1882 break; 1883 case Op_VectorCastF2X: { 1884 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1885 // happen after intermediate conversion to integer and special handling 1886 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1887 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1888 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1889 return false; 1890 } 1891 } 1892 // fallthrough 1893 case Op_VectorCastD2X: 1894 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1895 return false; 1896 } 1897 break; 1898 case Op_VectorCastF2HF: 1899 case Op_VectorCastHF2F: 1900 if (!VM_Version::supports_f16c() && 1901 ((!VM_Version::supports_evex() || 1902 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1903 return false; 1904 } 1905 break; 1906 case Op_RoundVD: 1907 if (!VM_Version::supports_avx512dq()) { 1908 return false; 1909 } 1910 break; 1911 case Op_MulReductionVI: 1912 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1913 return false; 1914 } 1915 break; 1916 case Op_LoadVectorGatherMasked: 1917 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1918 return false; 1919 } 1920 if (is_subword_type(bt) && 1921 (!is_LP64 || 1922 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1923 (size_in_bits < 64) || 1924 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1925 return false; 1926 } 1927 break; 1928 case Op_StoreVectorScatterMasked: 1929 case Op_StoreVectorScatter: 1930 if (is_subword_type(bt)) { 1931 return false; 1932 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1933 return false; 1934 } 1935 // fallthrough 1936 case Op_LoadVectorGather: 1937 if (!is_subword_type(bt) && size_in_bits == 64) { 1938 return false; 1939 } 1940 if (is_subword_type(bt) && size_in_bits < 64) { 1941 return false; 1942 } 1943 break; 1944 case Op_SaturatingAddV: 1945 case Op_SaturatingSubV: 1946 if (UseAVX < 1) { 1947 return false; // Implementation limitation 1948 } 1949 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1950 return false; 1951 } 1952 break; 1953 case Op_SelectFromTwoVector: 1954 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1955 return false; 1956 } 1957 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1958 return false; 1959 } 1960 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1961 return false; 1962 } 1963 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1964 return false; 1965 } 1966 break; 1967 case Op_MaskAll: 1968 if (!VM_Version::supports_evex()) { 1969 return false; 1970 } 1971 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1972 return false; 1973 } 1974 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1975 return false; 1976 } 1977 break; 1978 case Op_VectorMaskCmp: 1979 if (vlen < 2 || size_in_bits < 32) { 1980 return false; 1981 } 1982 break; 1983 case Op_CompressM: 1984 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1985 return false; 1986 } 1987 break; 1988 case Op_CompressV: 1989 case Op_ExpandV: 1990 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1991 return false; 1992 } 1993 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1994 return false; 1995 } 1996 if (size_in_bits < 128 ) { 1997 return false; 1998 } 1999 case Op_VectorLongToMask: 2000 if (UseAVX < 1 || !is_LP64) { 2001 return false; 2002 } 2003 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 2004 return false; 2005 } 2006 break; 2007 case Op_SignumVD: 2008 case Op_SignumVF: 2009 if (UseAVX < 1) { 2010 return false; 2011 } 2012 break; 2013 case Op_PopCountVI: 2014 case Op_PopCountVL: { 2015 if (!is_pop_count_instr_target(bt) && 2016 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2017 return false; 2018 } 2019 } 2020 break; 2021 case Op_ReverseV: 2022 case Op_ReverseBytesV: 2023 if (UseAVX < 2) { 2024 return false; 2025 } 2026 break; 2027 case Op_CountTrailingZerosV: 2028 case Op_CountLeadingZerosV: 2029 if (UseAVX < 2) { 2030 return false; 2031 } 2032 break; 2033 } 2034 return true; // Per default match rules are supported. 2035 } 2036 2037 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2038 // ADLC based match_rule_supported routine checks for the existence of pattern based 2039 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2040 // of their non-masked counterpart with mask edge being the differentiator. 2041 // This routine does a strict check on the existence of masked operation patterns 2042 // by returning a default false value for all the other opcodes apart from the 2043 // ones whose masked instruction patterns are defined in this file. 2044 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2045 return false; 2046 } 2047 2048 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2049 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2050 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2051 return false; 2052 } 2053 switch(opcode) { 2054 // Unary masked operations 2055 case Op_AbsVB: 2056 case Op_AbsVS: 2057 if(!VM_Version::supports_avx512bw()) { 2058 return false; // Implementation limitation 2059 } 2060 case Op_AbsVI: 2061 case Op_AbsVL: 2062 return true; 2063 2064 // Ternary masked operations 2065 case Op_FmaVF: 2066 case Op_FmaVD: 2067 return true; 2068 2069 case Op_MacroLogicV: 2070 if(bt != T_INT && bt != T_LONG) { 2071 return false; 2072 } 2073 return true; 2074 2075 // Binary masked operations 2076 case Op_AddVB: 2077 case Op_AddVS: 2078 case Op_SubVB: 2079 case Op_SubVS: 2080 case Op_MulVS: 2081 case Op_LShiftVS: 2082 case Op_RShiftVS: 2083 case Op_URShiftVS: 2084 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2085 if (!VM_Version::supports_avx512bw()) { 2086 return false; // Implementation limitation 2087 } 2088 return true; 2089 2090 case Op_MulVL: 2091 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2092 if (!VM_Version::supports_avx512dq()) { 2093 return false; // Implementation limitation 2094 } 2095 return true; 2096 2097 case Op_AndV: 2098 case Op_OrV: 2099 case Op_XorV: 2100 case Op_RotateRightV: 2101 case Op_RotateLeftV: 2102 if (bt != T_INT && bt != T_LONG) { 2103 return false; // Implementation limitation 2104 } 2105 return true; 2106 2107 case Op_VectorLoadMask: 2108 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2109 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2110 return false; 2111 } 2112 return true; 2113 2114 case Op_AddVI: 2115 case Op_AddVL: 2116 case Op_AddVF: 2117 case Op_AddVD: 2118 case Op_SubVI: 2119 case Op_SubVL: 2120 case Op_SubVF: 2121 case Op_SubVD: 2122 case Op_MulVI: 2123 case Op_MulVF: 2124 case Op_MulVD: 2125 case Op_DivVF: 2126 case Op_DivVD: 2127 case Op_SqrtVF: 2128 case Op_SqrtVD: 2129 case Op_LShiftVI: 2130 case Op_LShiftVL: 2131 case Op_RShiftVI: 2132 case Op_RShiftVL: 2133 case Op_URShiftVI: 2134 case Op_URShiftVL: 2135 case Op_LoadVectorMasked: 2136 case Op_StoreVectorMasked: 2137 case Op_LoadVectorGatherMasked: 2138 case Op_StoreVectorScatterMasked: 2139 return true; 2140 2141 case Op_UMinV: 2142 case Op_UMaxV: 2143 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2144 return false; 2145 } // fallthrough 2146 case Op_MaxV: 2147 case Op_MinV: 2148 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2149 return false; // Implementation limitation 2150 } 2151 if (is_floating_point_type(bt)) { 2152 return false; // Implementation limitation 2153 } 2154 return true; 2155 case Op_SaturatingAddV: 2156 case Op_SaturatingSubV: 2157 if (!is_subword_type(bt)) { 2158 return false; 2159 } 2160 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2161 return false; // Implementation limitation 2162 } 2163 return true; 2164 2165 case Op_VectorMaskCmp: 2166 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2167 return false; // Implementation limitation 2168 } 2169 return true; 2170 2171 case Op_VectorRearrange: 2172 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2173 return false; // Implementation limitation 2174 } 2175 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2176 return false; // Implementation limitation 2177 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2178 return false; // Implementation limitation 2179 } 2180 return true; 2181 2182 // Binary Logical operations 2183 case Op_AndVMask: 2184 case Op_OrVMask: 2185 case Op_XorVMask: 2186 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2187 return false; // Implementation limitation 2188 } 2189 return true; 2190 2191 case Op_PopCountVI: 2192 case Op_PopCountVL: 2193 if (!is_pop_count_instr_target(bt)) { 2194 return false; 2195 } 2196 return true; 2197 2198 case Op_MaskAll: 2199 return true; 2200 2201 case Op_CountLeadingZerosV: 2202 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2203 return true; 2204 } 2205 default: 2206 return false; 2207 } 2208 } 2209 2210 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2211 return false; 2212 } 2213 2214 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2215 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2216 bool legacy = (generic_opnd->opcode() == LEGVEC); 2217 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2218 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2219 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2220 return new legVecZOper(); 2221 } 2222 if (legacy) { 2223 switch (ideal_reg) { 2224 case Op_VecS: return new legVecSOper(); 2225 case Op_VecD: return new legVecDOper(); 2226 case Op_VecX: return new legVecXOper(); 2227 case Op_VecY: return new legVecYOper(); 2228 case Op_VecZ: return new legVecZOper(); 2229 } 2230 } else { 2231 switch (ideal_reg) { 2232 case Op_VecS: return new vecSOper(); 2233 case Op_VecD: return new vecDOper(); 2234 case Op_VecX: return new vecXOper(); 2235 case Op_VecY: return new vecYOper(); 2236 case Op_VecZ: return new vecZOper(); 2237 } 2238 } 2239 ShouldNotReachHere(); 2240 return nullptr; 2241 } 2242 2243 bool Matcher::is_reg2reg_move(MachNode* m) { 2244 switch (m->rule()) { 2245 case MoveVec2Leg_rule: 2246 case MoveLeg2Vec_rule: 2247 case MoveF2VL_rule: 2248 case MoveF2LEG_rule: 2249 case MoveVL2F_rule: 2250 case MoveLEG2F_rule: 2251 case MoveD2VL_rule: 2252 case MoveD2LEG_rule: 2253 case MoveVL2D_rule: 2254 case MoveLEG2D_rule: 2255 return true; 2256 default: 2257 return false; 2258 } 2259 } 2260 2261 bool Matcher::is_generic_vector(MachOper* opnd) { 2262 switch (opnd->opcode()) { 2263 case VEC: 2264 case LEGVEC: 2265 return true; 2266 default: 2267 return false; 2268 } 2269 } 2270 2271 //------------------------------------------------------------------------ 2272 2273 const RegMask* Matcher::predicate_reg_mask(void) { 2274 return &_VECTMASK_REG_mask; 2275 } 2276 2277 // Max vector size in bytes. 0 if not supported. 2278 int Matcher::vector_width_in_bytes(BasicType bt) { 2279 assert(is_java_primitive(bt), "only primitive type vectors"); 2280 if (UseSSE < 2) return 0; 2281 // SSE2 supports 128bit vectors for all types. 2282 // AVX2 supports 256bit vectors for all types. 2283 // AVX2/EVEX supports 512bit vectors for all types. 2284 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2285 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2286 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2287 size = (UseAVX > 2) ? 64 : 32; 2288 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2289 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2290 // Use flag to limit vector size. 2291 size = MIN2(size,(int)MaxVectorSize); 2292 // Minimum 2 values in vector (or 4 for bytes). 2293 switch (bt) { 2294 case T_DOUBLE: 2295 case T_LONG: 2296 if (size < 16) return 0; 2297 break; 2298 case T_FLOAT: 2299 case T_INT: 2300 if (size < 8) return 0; 2301 break; 2302 case T_BOOLEAN: 2303 if (size < 4) return 0; 2304 break; 2305 case T_CHAR: 2306 if (size < 4) return 0; 2307 break; 2308 case T_BYTE: 2309 if (size < 4) return 0; 2310 break; 2311 case T_SHORT: 2312 if (size < 4) return 0; 2313 break; 2314 default: 2315 ShouldNotReachHere(); 2316 } 2317 return size; 2318 } 2319 2320 // Limits on vector size (number of elements) loaded into vector. 2321 int Matcher::max_vector_size(const BasicType bt) { 2322 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2323 } 2324 int Matcher::min_vector_size(const BasicType bt) { 2325 int max_size = max_vector_size(bt); 2326 // Min size which can be loaded into vector is 4 bytes. 2327 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2328 // Support for calling svml double64 vectors 2329 if (bt == T_DOUBLE) { 2330 size = 1; 2331 } 2332 return MIN2(size,max_size); 2333 } 2334 2335 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2336 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2337 // by default on Cascade Lake 2338 if (VM_Version::is_default_intel_cascade_lake()) { 2339 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2340 } 2341 return Matcher::max_vector_size(bt); 2342 } 2343 2344 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2345 return -1; 2346 } 2347 2348 // Vector ideal reg corresponding to specified size in bytes 2349 uint Matcher::vector_ideal_reg(int size) { 2350 assert(MaxVectorSize >= size, ""); 2351 switch(size) { 2352 case 4: return Op_VecS; 2353 case 8: return Op_VecD; 2354 case 16: return Op_VecX; 2355 case 32: return Op_VecY; 2356 case 64: return Op_VecZ; 2357 } 2358 ShouldNotReachHere(); 2359 return 0; 2360 } 2361 2362 // Check for shift by small constant as well 2363 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2364 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2365 shift->in(2)->get_int() <= 3 && 2366 // Are there other uses besides address expressions? 2367 !matcher->is_visited(shift)) { 2368 address_visited.set(shift->_idx); // Flag as address_visited 2369 mstack.push(shift->in(2), Matcher::Visit); 2370 Node *conv = shift->in(1); 2371 #ifdef _LP64 2372 // Allow Matcher to match the rule which bypass 2373 // ConvI2L operation for an array index on LP64 2374 // if the index value is positive. 2375 if (conv->Opcode() == Op_ConvI2L && 2376 conv->as_Type()->type()->is_long()->_lo >= 0 && 2377 // Are there other uses besides address expressions? 2378 !matcher->is_visited(conv)) { 2379 address_visited.set(conv->_idx); // Flag as address_visited 2380 mstack.push(conv->in(1), Matcher::Pre_Visit); 2381 } else 2382 #endif 2383 mstack.push(conv, Matcher::Pre_Visit); 2384 return true; 2385 } 2386 return false; 2387 } 2388 2389 // This function identifies sub-graphs in which a 'load' node is 2390 // input to two different nodes, and such that it can be matched 2391 // with BMI instructions like blsi, blsr, etc. 2392 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2393 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2394 // refers to the same node. 2395 // 2396 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2397 // This is a temporary solution until we make DAGs expressible in ADL. 2398 template<typename ConType> 2399 class FusedPatternMatcher { 2400 Node* _op1_node; 2401 Node* _mop_node; 2402 int _con_op; 2403 2404 static int match_next(Node* n, int next_op, int next_op_idx) { 2405 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2406 return -1; 2407 } 2408 2409 if (next_op_idx == -1) { // n is commutative, try rotations 2410 if (n->in(1)->Opcode() == next_op) { 2411 return 1; 2412 } else if (n->in(2)->Opcode() == next_op) { 2413 return 2; 2414 } 2415 } else { 2416 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2417 if (n->in(next_op_idx)->Opcode() == next_op) { 2418 return next_op_idx; 2419 } 2420 } 2421 return -1; 2422 } 2423 2424 public: 2425 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2426 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2427 2428 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2429 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2430 typename ConType::NativeType con_value) { 2431 if (_op1_node->Opcode() != op1) { 2432 return false; 2433 } 2434 if (_mop_node->outcnt() > 2) { 2435 return false; 2436 } 2437 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2438 if (op1_op2_idx == -1) { 2439 return false; 2440 } 2441 // Memory operation must be the other edge 2442 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2443 2444 // Check that the mop node is really what we want 2445 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2446 Node* op2_node = _op1_node->in(op1_op2_idx); 2447 if (op2_node->outcnt() > 1) { 2448 return false; 2449 } 2450 assert(op2_node->Opcode() == op2, "Should be"); 2451 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2452 if (op2_con_idx == -1) { 2453 return false; 2454 } 2455 // Memory operation must be the other edge 2456 int op2_mop_idx = (op2_con_idx & 1) + 1; 2457 // Check that the memory operation is the same node 2458 if (op2_node->in(op2_mop_idx) == _mop_node) { 2459 // Now check the constant 2460 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2461 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2462 return true; 2463 } 2464 } 2465 } 2466 return false; 2467 } 2468 }; 2469 2470 static bool is_bmi_pattern(Node* n, Node* m) { 2471 assert(UseBMI1Instructions, "sanity"); 2472 if (n != nullptr && m != nullptr) { 2473 if (m->Opcode() == Op_LoadI) { 2474 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2475 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2476 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2477 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2478 } else if (m->Opcode() == Op_LoadL) { 2479 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2480 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2481 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2482 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2483 } 2484 } 2485 return false; 2486 } 2487 2488 // Should the matcher clone input 'm' of node 'n'? 2489 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2490 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2491 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2492 mstack.push(m, Visit); 2493 return true; 2494 } 2495 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2496 mstack.push(m, Visit); // m = ShiftCntV 2497 return true; 2498 } 2499 if (is_encode_and_store_pattern(n, m)) { 2500 mstack.push(m, Visit); 2501 return true; 2502 } 2503 return false; 2504 } 2505 2506 // Should the Matcher clone shifts on addressing modes, expecting them 2507 // to be subsumed into complex addressing expressions or compute them 2508 // into registers? 2509 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2510 Node *off = m->in(AddPNode::Offset); 2511 if (off->is_Con()) { 2512 address_visited.test_set(m->_idx); // Flag as address_visited 2513 Node *adr = m->in(AddPNode::Address); 2514 2515 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2516 // AtomicAdd is not an addressing expression. 2517 // Cheap to find it by looking for screwy base. 2518 if (adr->is_AddP() && 2519 !adr->in(AddPNode::Base)->is_top() && 2520 !adr->in(AddPNode::Offset)->is_Con() && 2521 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2522 // Are there other uses besides address expressions? 2523 !is_visited(adr)) { 2524 address_visited.set(adr->_idx); // Flag as address_visited 2525 Node *shift = adr->in(AddPNode::Offset); 2526 if (!clone_shift(shift, this, mstack, address_visited)) { 2527 mstack.push(shift, Pre_Visit); 2528 } 2529 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2530 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2531 } else { 2532 mstack.push(adr, Pre_Visit); 2533 } 2534 2535 // Clone X+offset as it also folds into most addressing expressions 2536 mstack.push(off, Visit); 2537 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2538 return true; 2539 } else if (clone_shift(off, this, mstack, address_visited)) { 2540 address_visited.test_set(m->_idx); // Flag as address_visited 2541 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2542 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2543 return true; 2544 } 2545 return false; 2546 } 2547 2548 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2549 switch (bt) { 2550 case BoolTest::eq: 2551 return Assembler::eq; 2552 case BoolTest::ne: 2553 return Assembler::neq; 2554 case BoolTest::le: 2555 case BoolTest::ule: 2556 return Assembler::le; 2557 case BoolTest::ge: 2558 case BoolTest::uge: 2559 return Assembler::nlt; 2560 case BoolTest::lt: 2561 case BoolTest::ult: 2562 return Assembler::lt; 2563 case BoolTest::gt: 2564 case BoolTest::ugt: 2565 return Assembler::nle; 2566 default : ShouldNotReachHere(); return Assembler::_false; 2567 } 2568 } 2569 2570 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2571 switch (bt) { 2572 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2573 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2574 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2575 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2576 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2577 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2578 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2579 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2580 } 2581 } 2582 2583 // Helper methods for MachSpillCopyNode::implementation(). 2584 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2585 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2586 assert(ireg == Op_VecS || // 32bit vector 2587 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2588 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2589 "no non-adjacent vector moves" ); 2590 if (masm) { 2591 switch (ireg) { 2592 case Op_VecS: // copy whole register 2593 case Op_VecD: 2594 case Op_VecX: 2595 #ifndef _LP64 2596 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2597 #else 2598 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2599 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2600 } else { 2601 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2602 } 2603 #endif 2604 break; 2605 case Op_VecY: 2606 #ifndef _LP64 2607 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2608 #else 2609 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2610 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2611 } else { 2612 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2613 } 2614 #endif 2615 break; 2616 case Op_VecZ: 2617 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 #ifndef PRODUCT 2623 } else { 2624 switch (ireg) { 2625 case Op_VecS: 2626 case Op_VecD: 2627 case Op_VecX: 2628 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2629 break; 2630 case Op_VecY: 2631 case Op_VecZ: 2632 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2633 break; 2634 default: 2635 ShouldNotReachHere(); 2636 } 2637 #endif 2638 } 2639 } 2640 2641 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2642 int stack_offset, int reg, uint ireg, outputStream* st) { 2643 if (masm) { 2644 if (is_load) { 2645 switch (ireg) { 2646 case Op_VecS: 2647 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2648 break; 2649 case Op_VecD: 2650 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2651 break; 2652 case Op_VecX: 2653 #ifndef _LP64 2654 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2655 #else 2656 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2657 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2658 } else { 2659 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2660 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2661 } 2662 #endif 2663 break; 2664 case Op_VecY: 2665 #ifndef _LP64 2666 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2667 #else 2668 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2669 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2670 } else { 2671 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2672 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2673 } 2674 #endif 2675 break; 2676 case Op_VecZ: 2677 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2678 break; 2679 default: 2680 ShouldNotReachHere(); 2681 } 2682 } else { // store 2683 switch (ireg) { 2684 case Op_VecS: 2685 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2686 break; 2687 case Op_VecD: 2688 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2689 break; 2690 case Op_VecX: 2691 #ifndef _LP64 2692 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2693 #else 2694 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2695 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2696 } 2697 else { 2698 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2699 } 2700 #endif 2701 break; 2702 case Op_VecY: 2703 #ifndef _LP64 2704 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2705 #else 2706 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2707 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2708 } 2709 else { 2710 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2711 } 2712 #endif 2713 break; 2714 case Op_VecZ: 2715 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2716 break; 2717 default: 2718 ShouldNotReachHere(); 2719 } 2720 } 2721 #ifndef PRODUCT 2722 } else { 2723 if (is_load) { 2724 switch (ireg) { 2725 case Op_VecS: 2726 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2727 break; 2728 case Op_VecD: 2729 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2730 break; 2731 case Op_VecX: 2732 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2733 break; 2734 case Op_VecY: 2735 case Op_VecZ: 2736 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2737 break; 2738 default: 2739 ShouldNotReachHere(); 2740 } 2741 } else { // store 2742 switch (ireg) { 2743 case Op_VecS: 2744 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2745 break; 2746 case Op_VecD: 2747 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2748 break; 2749 case Op_VecX: 2750 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2751 break; 2752 case Op_VecY: 2753 case Op_VecZ: 2754 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2755 break; 2756 default: 2757 ShouldNotReachHere(); 2758 } 2759 } 2760 #endif 2761 } 2762 } 2763 2764 template <class T> 2765 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2766 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2767 jvalue ele; 2768 switch (bt) { 2769 case T_BYTE: ele.b = con; break; 2770 case T_SHORT: ele.s = con; break; 2771 case T_INT: ele.i = con; break; 2772 case T_LONG: ele.j = con; break; 2773 case T_FLOAT: ele.f = con; break; 2774 case T_DOUBLE: ele.d = con; break; 2775 default: ShouldNotReachHere(); 2776 } 2777 for (int i = 0; i < len; i++) { 2778 val->append(ele); 2779 } 2780 return val; 2781 } 2782 2783 static inline jlong high_bit_set(BasicType bt) { 2784 switch (bt) { 2785 case T_BYTE: return 0x8080808080808080; 2786 case T_SHORT: return 0x8000800080008000; 2787 case T_INT: return 0x8000000080000000; 2788 case T_LONG: return 0x8000000000000000; 2789 default: 2790 ShouldNotReachHere(); 2791 return 0; 2792 } 2793 } 2794 2795 #ifndef PRODUCT 2796 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2797 st->print("nop \t# %d bytes pad for loops and calls", _count); 2798 } 2799 #endif 2800 2801 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2802 __ nop(_count); 2803 } 2804 2805 uint MachNopNode::size(PhaseRegAlloc*) const { 2806 return _count; 2807 } 2808 2809 #ifndef PRODUCT 2810 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2811 st->print("# breakpoint"); 2812 } 2813 #endif 2814 2815 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2816 __ int3(); 2817 } 2818 2819 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2820 return MachNode::size(ra_); 2821 } 2822 2823 %} 2824 2825 encode %{ 2826 2827 enc_class call_epilog %{ 2828 if (VerifyStackAtCalls) { 2829 // Check that stack depth is unchanged: find majik cookie on stack 2830 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2831 Label L; 2832 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2833 __ jccb(Assembler::equal, L); 2834 // Die if stack mismatch 2835 __ int3(); 2836 __ bind(L); 2837 } 2838 %} 2839 2840 %} 2841 2842 // Operands for bound floating pointer register arguments 2843 operand rxmm0() %{ 2844 constraint(ALLOC_IN_RC(xmm0_reg)); 2845 match(VecX); 2846 format%{%} 2847 interface(REG_INTER); 2848 %} 2849 2850 //----------OPERANDS----------------------------------------------------------- 2851 // Operand definitions must precede instruction definitions for correct parsing 2852 // in the ADLC because operands constitute user defined types which are used in 2853 // instruction definitions. 2854 2855 // Vectors 2856 2857 // Dummy generic vector class. Should be used for all vector operands. 2858 // Replaced with vec[SDXYZ] during post-selection pass. 2859 operand vec() %{ 2860 constraint(ALLOC_IN_RC(dynamic)); 2861 match(VecX); 2862 match(VecY); 2863 match(VecZ); 2864 match(VecS); 2865 match(VecD); 2866 2867 format %{ %} 2868 interface(REG_INTER); 2869 %} 2870 2871 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2872 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2873 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2874 // runtime code generation via reg_class_dynamic. 2875 operand legVec() %{ 2876 constraint(ALLOC_IN_RC(dynamic)); 2877 match(VecX); 2878 match(VecY); 2879 match(VecZ); 2880 match(VecS); 2881 match(VecD); 2882 2883 format %{ %} 2884 interface(REG_INTER); 2885 %} 2886 2887 // Replaces vec during post-selection cleanup. See above. 2888 operand vecS() %{ 2889 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2890 match(VecS); 2891 2892 format %{ %} 2893 interface(REG_INTER); 2894 %} 2895 2896 // Replaces legVec during post-selection cleanup. See above. 2897 operand legVecS() %{ 2898 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2899 match(VecS); 2900 2901 format %{ %} 2902 interface(REG_INTER); 2903 %} 2904 2905 // Replaces vec during post-selection cleanup. See above. 2906 operand vecD() %{ 2907 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2908 match(VecD); 2909 2910 format %{ %} 2911 interface(REG_INTER); 2912 %} 2913 2914 // Replaces legVec during post-selection cleanup. See above. 2915 operand legVecD() %{ 2916 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2917 match(VecD); 2918 2919 format %{ %} 2920 interface(REG_INTER); 2921 %} 2922 2923 // Replaces vec during post-selection cleanup. See above. 2924 operand vecX() %{ 2925 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2926 match(VecX); 2927 2928 format %{ %} 2929 interface(REG_INTER); 2930 %} 2931 2932 // Replaces legVec during post-selection cleanup. See above. 2933 operand legVecX() %{ 2934 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2935 match(VecX); 2936 2937 format %{ %} 2938 interface(REG_INTER); 2939 %} 2940 2941 // Replaces vec during post-selection cleanup. See above. 2942 operand vecY() %{ 2943 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2944 match(VecY); 2945 2946 format %{ %} 2947 interface(REG_INTER); 2948 %} 2949 2950 // Replaces legVec during post-selection cleanup. See above. 2951 operand legVecY() %{ 2952 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2953 match(VecY); 2954 2955 format %{ %} 2956 interface(REG_INTER); 2957 %} 2958 2959 // Replaces vec during post-selection cleanup. See above. 2960 operand vecZ() %{ 2961 constraint(ALLOC_IN_RC(vectorz_reg)); 2962 match(VecZ); 2963 2964 format %{ %} 2965 interface(REG_INTER); 2966 %} 2967 2968 // Replaces legVec during post-selection cleanup. See above. 2969 operand legVecZ() %{ 2970 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2971 match(VecZ); 2972 2973 format %{ %} 2974 interface(REG_INTER); 2975 %} 2976 2977 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2978 2979 // ============================================================================ 2980 2981 instruct ShouldNotReachHere() %{ 2982 match(Halt); 2983 format %{ "stop\t# ShouldNotReachHere" %} 2984 ins_encode %{ 2985 if (is_reachable()) { 2986 __ stop(_halt_reason); 2987 } 2988 %} 2989 ins_pipe(pipe_slow); 2990 %} 2991 2992 // ============================================================================ 2993 2994 instruct addF_reg(regF dst, regF src) %{ 2995 predicate((UseSSE>=1) && (UseAVX == 0)); 2996 match(Set dst (AddF dst src)); 2997 2998 format %{ "addss $dst, $src" %} 2999 ins_cost(150); 3000 ins_encode %{ 3001 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3002 %} 3003 ins_pipe(pipe_slow); 3004 %} 3005 3006 instruct addF_mem(regF dst, memory src) %{ 3007 predicate((UseSSE>=1) && (UseAVX == 0)); 3008 match(Set dst (AddF dst (LoadF src))); 3009 3010 format %{ "addss $dst, $src" %} 3011 ins_cost(150); 3012 ins_encode %{ 3013 __ addss($dst$$XMMRegister, $src$$Address); 3014 %} 3015 ins_pipe(pipe_slow); 3016 %} 3017 3018 instruct addF_imm(regF dst, immF con) %{ 3019 predicate((UseSSE>=1) && (UseAVX == 0)); 3020 match(Set dst (AddF dst con)); 3021 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3022 ins_cost(150); 3023 ins_encode %{ 3024 __ addss($dst$$XMMRegister, $constantaddress($con)); 3025 %} 3026 ins_pipe(pipe_slow); 3027 %} 3028 3029 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3030 predicate(UseAVX > 0); 3031 match(Set dst (AddF src1 src2)); 3032 3033 format %{ "vaddss $dst, $src1, $src2" %} 3034 ins_cost(150); 3035 ins_encode %{ 3036 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3037 %} 3038 ins_pipe(pipe_slow); 3039 %} 3040 3041 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3042 predicate(UseAVX > 0); 3043 match(Set dst (AddF src1 (LoadF src2))); 3044 3045 format %{ "vaddss $dst, $src1, $src2" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3054 predicate(UseAVX > 0); 3055 match(Set dst (AddF src con)); 3056 3057 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct addD_reg(regD dst, regD src) %{ 3066 predicate((UseSSE>=2) && (UseAVX == 0)); 3067 match(Set dst (AddD dst src)); 3068 3069 format %{ "addsd $dst, $src" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct addD_mem(regD dst, memory src) %{ 3078 predicate((UseSSE>=2) && (UseAVX == 0)); 3079 match(Set dst (AddD dst (LoadD src))); 3080 3081 format %{ "addsd $dst, $src" %} 3082 ins_cost(150); 3083 ins_encode %{ 3084 __ addsd($dst$$XMMRegister, $src$$Address); 3085 %} 3086 ins_pipe(pipe_slow); 3087 %} 3088 3089 instruct addD_imm(regD dst, immD con) %{ 3090 predicate((UseSSE>=2) && (UseAVX == 0)); 3091 match(Set dst (AddD dst con)); 3092 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3093 ins_cost(150); 3094 ins_encode %{ 3095 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3096 %} 3097 ins_pipe(pipe_slow); 3098 %} 3099 3100 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3101 predicate(UseAVX > 0); 3102 match(Set dst (AddD src1 src2)); 3103 3104 format %{ "vaddsd $dst, $src1, $src2" %} 3105 ins_cost(150); 3106 ins_encode %{ 3107 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3108 %} 3109 ins_pipe(pipe_slow); 3110 %} 3111 3112 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3113 predicate(UseAVX > 0); 3114 match(Set dst (AddD src1 (LoadD src2))); 3115 3116 format %{ "vaddsd $dst, $src1, $src2" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3120 %} 3121 ins_pipe(pipe_slow); 3122 %} 3123 3124 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3125 predicate(UseAVX > 0); 3126 match(Set dst (AddD src con)); 3127 3128 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3129 ins_cost(150); 3130 ins_encode %{ 3131 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct subF_reg(regF dst, regF src) %{ 3137 predicate((UseSSE>=1) && (UseAVX == 0)); 3138 match(Set dst (SubF dst src)); 3139 3140 format %{ "subss $dst, $src" %} 3141 ins_cost(150); 3142 ins_encode %{ 3143 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3144 %} 3145 ins_pipe(pipe_slow); 3146 %} 3147 3148 instruct subF_mem(regF dst, memory src) %{ 3149 predicate((UseSSE>=1) && (UseAVX == 0)); 3150 match(Set dst (SubF dst (LoadF src))); 3151 3152 format %{ "subss $dst, $src" %} 3153 ins_cost(150); 3154 ins_encode %{ 3155 __ subss($dst$$XMMRegister, $src$$Address); 3156 %} 3157 ins_pipe(pipe_slow); 3158 %} 3159 3160 instruct subF_imm(regF dst, immF con) %{ 3161 predicate((UseSSE>=1) && (UseAVX == 0)); 3162 match(Set dst (SubF dst con)); 3163 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3164 ins_cost(150); 3165 ins_encode %{ 3166 __ subss($dst$$XMMRegister, $constantaddress($con)); 3167 %} 3168 ins_pipe(pipe_slow); 3169 %} 3170 3171 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3172 predicate(UseAVX > 0); 3173 match(Set dst (SubF src1 src2)); 3174 3175 format %{ "vsubss $dst, $src1, $src2" %} 3176 ins_cost(150); 3177 ins_encode %{ 3178 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3179 %} 3180 ins_pipe(pipe_slow); 3181 %} 3182 3183 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3184 predicate(UseAVX > 0); 3185 match(Set dst (SubF src1 (LoadF src2))); 3186 3187 format %{ "vsubss $dst, $src1, $src2" %} 3188 ins_cost(150); 3189 ins_encode %{ 3190 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3191 %} 3192 ins_pipe(pipe_slow); 3193 %} 3194 3195 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3196 predicate(UseAVX > 0); 3197 match(Set dst (SubF src con)); 3198 3199 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3200 ins_cost(150); 3201 ins_encode %{ 3202 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3203 %} 3204 ins_pipe(pipe_slow); 3205 %} 3206 3207 instruct subD_reg(regD dst, regD src) %{ 3208 predicate((UseSSE>=2) && (UseAVX == 0)); 3209 match(Set dst (SubD dst src)); 3210 3211 format %{ "subsd $dst, $src" %} 3212 ins_cost(150); 3213 ins_encode %{ 3214 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3215 %} 3216 ins_pipe(pipe_slow); 3217 %} 3218 3219 instruct subD_mem(regD dst, memory src) %{ 3220 predicate((UseSSE>=2) && (UseAVX == 0)); 3221 match(Set dst (SubD dst (LoadD src))); 3222 3223 format %{ "subsd $dst, $src" %} 3224 ins_cost(150); 3225 ins_encode %{ 3226 __ subsd($dst$$XMMRegister, $src$$Address); 3227 %} 3228 ins_pipe(pipe_slow); 3229 %} 3230 3231 instruct subD_imm(regD dst, immD con) %{ 3232 predicate((UseSSE>=2) && (UseAVX == 0)); 3233 match(Set dst (SubD dst con)); 3234 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3235 ins_cost(150); 3236 ins_encode %{ 3237 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3238 %} 3239 ins_pipe(pipe_slow); 3240 %} 3241 3242 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3243 predicate(UseAVX > 0); 3244 match(Set dst (SubD src1 src2)); 3245 3246 format %{ "vsubsd $dst, $src1, $src2" %} 3247 ins_cost(150); 3248 ins_encode %{ 3249 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3250 %} 3251 ins_pipe(pipe_slow); 3252 %} 3253 3254 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3255 predicate(UseAVX > 0); 3256 match(Set dst (SubD src1 (LoadD src2))); 3257 3258 format %{ "vsubsd $dst, $src1, $src2" %} 3259 ins_cost(150); 3260 ins_encode %{ 3261 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3262 %} 3263 ins_pipe(pipe_slow); 3264 %} 3265 3266 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3267 predicate(UseAVX > 0); 3268 match(Set dst (SubD src con)); 3269 3270 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3271 ins_cost(150); 3272 ins_encode %{ 3273 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3274 %} 3275 ins_pipe(pipe_slow); 3276 %} 3277 3278 instruct mulF_reg(regF dst, regF src) %{ 3279 predicate((UseSSE>=1) && (UseAVX == 0)); 3280 match(Set dst (MulF dst src)); 3281 3282 format %{ "mulss $dst, $src" %} 3283 ins_cost(150); 3284 ins_encode %{ 3285 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3286 %} 3287 ins_pipe(pipe_slow); 3288 %} 3289 3290 instruct mulF_mem(regF dst, memory src) %{ 3291 predicate((UseSSE>=1) && (UseAVX == 0)); 3292 match(Set dst (MulF dst (LoadF src))); 3293 3294 format %{ "mulss $dst, $src" %} 3295 ins_cost(150); 3296 ins_encode %{ 3297 __ mulss($dst$$XMMRegister, $src$$Address); 3298 %} 3299 ins_pipe(pipe_slow); 3300 %} 3301 3302 instruct mulF_imm(regF dst, immF con) %{ 3303 predicate((UseSSE>=1) && (UseAVX == 0)); 3304 match(Set dst (MulF dst con)); 3305 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3306 ins_cost(150); 3307 ins_encode %{ 3308 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3309 %} 3310 ins_pipe(pipe_slow); 3311 %} 3312 3313 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3314 predicate(UseAVX > 0); 3315 match(Set dst (MulF src1 src2)); 3316 3317 format %{ "vmulss $dst, $src1, $src2" %} 3318 ins_cost(150); 3319 ins_encode %{ 3320 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3321 %} 3322 ins_pipe(pipe_slow); 3323 %} 3324 3325 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3326 predicate(UseAVX > 0); 3327 match(Set dst (MulF src1 (LoadF src2))); 3328 3329 format %{ "vmulss $dst, $src1, $src2" %} 3330 ins_cost(150); 3331 ins_encode %{ 3332 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3333 %} 3334 ins_pipe(pipe_slow); 3335 %} 3336 3337 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3338 predicate(UseAVX > 0); 3339 match(Set dst (MulF src con)); 3340 3341 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3342 ins_cost(150); 3343 ins_encode %{ 3344 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3345 %} 3346 ins_pipe(pipe_slow); 3347 %} 3348 3349 instruct mulD_reg(regD dst, regD src) %{ 3350 predicate((UseSSE>=2) && (UseAVX == 0)); 3351 match(Set dst (MulD dst src)); 3352 3353 format %{ "mulsd $dst, $src" %} 3354 ins_cost(150); 3355 ins_encode %{ 3356 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3357 %} 3358 ins_pipe(pipe_slow); 3359 %} 3360 3361 instruct mulD_mem(regD dst, memory src) %{ 3362 predicate((UseSSE>=2) && (UseAVX == 0)); 3363 match(Set dst (MulD dst (LoadD src))); 3364 3365 format %{ "mulsd $dst, $src" %} 3366 ins_cost(150); 3367 ins_encode %{ 3368 __ mulsd($dst$$XMMRegister, $src$$Address); 3369 %} 3370 ins_pipe(pipe_slow); 3371 %} 3372 3373 instruct mulD_imm(regD dst, immD con) %{ 3374 predicate((UseSSE>=2) && (UseAVX == 0)); 3375 match(Set dst (MulD dst con)); 3376 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3377 ins_cost(150); 3378 ins_encode %{ 3379 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3380 %} 3381 ins_pipe(pipe_slow); 3382 %} 3383 3384 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3385 predicate(UseAVX > 0); 3386 match(Set dst (MulD src1 src2)); 3387 3388 format %{ "vmulsd $dst, $src1, $src2" %} 3389 ins_cost(150); 3390 ins_encode %{ 3391 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3392 %} 3393 ins_pipe(pipe_slow); 3394 %} 3395 3396 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3397 predicate(UseAVX > 0); 3398 match(Set dst (MulD src1 (LoadD src2))); 3399 3400 format %{ "vmulsd $dst, $src1, $src2" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3404 %} 3405 ins_pipe(pipe_slow); 3406 %} 3407 3408 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3409 predicate(UseAVX > 0); 3410 match(Set dst (MulD src con)); 3411 3412 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3413 ins_cost(150); 3414 ins_encode %{ 3415 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3416 %} 3417 ins_pipe(pipe_slow); 3418 %} 3419 3420 instruct divF_reg(regF dst, regF src) %{ 3421 predicate((UseSSE>=1) && (UseAVX == 0)); 3422 match(Set dst (DivF dst src)); 3423 3424 format %{ "divss $dst, $src" %} 3425 ins_cost(150); 3426 ins_encode %{ 3427 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3428 %} 3429 ins_pipe(pipe_slow); 3430 %} 3431 3432 instruct divF_mem(regF dst, memory src) %{ 3433 predicate((UseSSE>=1) && (UseAVX == 0)); 3434 match(Set dst (DivF dst (LoadF src))); 3435 3436 format %{ "divss $dst, $src" %} 3437 ins_cost(150); 3438 ins_encode %{ 3439 __ divss($dst$$XMMRegister, $src$$Address); 3440 %} 3441 ins_pipe(pipe_slow); 3442 %} 3443 3444 instruct divF_imm(regF dst, immF con) %{ 3445 predicate((UseSSE>=1) && (UseAVX == 0)); 3446 match(Set dst (DivF dst con)); 3447 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3448 ins_cost(150); 3449 ins_encode %{ 3450 __ divss($dst$$XMMRegister, $constantaddress($con)); 3451 %} 3452 ins_pipe(pipe_slow); 3453 %} 3454 3455 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3456 predicate(UseAVX > 0); 3457 match(Set dst (DivF src1 src2)); 3458 3459 format %{ "vdivss $dst, $src1, $src2" %} 3460 ins_cost(150); 3461 ins_encode %{ 3462 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3463 %} 3464 ins_pipe(pipe_slow); 3465 %} 3466 3467 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3468 predicate(UseAVX > 0); 3469 match(Set dst (DivF src1 (LoadF src2))); 3470 3471 format %{ "vdivss $dst, $src1, $src2" %} 3472 ins_cost(150); 3473 ins_encode %{ 3474 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3475 %} 3476 ins_pipe(pipe_slow); 3477 %} 3478 3479 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3480 predicate(UseAVX > 0); 3481 match(Set dst (DivF src con)); 3482 3483 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3484 ins_cost(150); 3485 ins_encode %{ 3486 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3487 %} 3488 ins_pipe(pipe_slow); 3489 %} 3490 3491 instruct divD_reg(regD dst, regD src) %{ 3492 predicate((UseSSE>=2) && (UseAVX == 0)); 3493 match(Set dst (DivD dst src)); 3494 3495 format %{ "divsd $dst, $src" %} 3496 ins_cost(150); 3497 ins_encode %{ 3498 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3499 %} 3500 ins_pipe(pipe_slow); 3501 %} 3502 3503 instruct divD_mem(regD dst, memory src) %{ 3504 predicate((UseSSE>=2) && (UseAVX == 0)); 3505 match(Set dst (DivD dst (LoadD src))); 3506 3507 format %{ "divsd $dst, $src" %} 3508 ins_cost(150); 3509 ins_encode %{ 3510 __ divsd($dst$$XMMRegister, $src$$Address); 3511 %} 3512 ins_pipe(pipe_slow); 3513 %} 3514 3515 instruct divD_imm(regD dst, immD con) %{ 3516 predicate((UseSSE>=2) && (UseAVX == 0)); 3517 match(Set dst (DivD dst con)); 3518 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3519 ins_cost(150); 3520 ins_encode %{ 3521 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3522 %} 3523 ins_pipe(pipe_slow); 3524 %} 3525 3526 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3527 predicate(UseAVX > 0); 3528 match(Set dst (DivD src1 src2)); 3529 3530 format %{ "vdivsd $dst, $src1, $src2" %} 3531 ins_cost(150); 3532 ins_encode %{ 3533 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3534 %} 3535 ins_pipe(pipe_slow); 3536 %} 3537 3538 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3539 predicate(UseAVX > 0); 3540 match(Set dst (DivD src1 (LoadD src2))); 3541 3542 format %{ "vdivsd $dst, $src1, $src2" %} 3543 ins_cost(150); 3544 ins_encode %{ 3545 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3546 %} 3547 ins_pipe(pipe_slow); 3548 %} 3549 3550 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3551 predicate(UseAVX > 0); 3552 match(Set dst (DivD src con)); 3553 3554 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3555 ins_cost(150); 3556 ins_encode %{ 3557 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3558 %} 3559 ins_pipe(pipe_slow); 3560 %} 3561 3562 instruct absF_reg(regF dst) %{ 3563 predicate((UseSSE>=1) && (UseAVX == 0)); 3564 match(Set dst (AbsF dst)); 3565 ins_cost(150); 3566 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3567 ins_encode %{ 3568 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3569 %} 3570 ins_pipe(pipe_slow); 3571 %} 3572 3573 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3574 predicate(UseAVX > 0); 3575 match(Set dst (AbsF src)); 3576 ins_cost(150); 3577 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3578 ins_encode %{ 3579 int vlen_enc = Assembler::AVX_128bit; 3580 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3581 ExternalAddress(float_signmask()), vlen_enc); 3582 %} 3583 ins_pipe(pipe_slow); 3584 %} 3585 3586 instruct absD_reg(regD dst) %{ 3587 predicate((UseSSE>=2) && (UseAVX == 0)); 3588 match(Set dst (AbsD dst)); 3589 ins_cost(150); 3590 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3591 "# abs double by sign masking" %} 3592 ins_encode %{ 3593 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3594 %} 3595 ins_pipe(pipe_slow); 3596 %} 3597 3598 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3599 predicate(UseAVX > 0); 3600 match(Set dst (AbsD src)); 3601 ins_cost(150); 3602 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3603 "# abs double by sign masking" %} 3604 ins_encode %{ 3605 int vlen_enc = Assembler::AVX_128bit; 3606 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3607 ExternalAddress(double_signmask()), vlen_enc); 3608 %} 3609 ins_pipe(pipe_slow); 3610 %} 3611 3612 instruct negF_reg(regF dst) %{ 3613 predicate((UseSSE>=1) && (UseAVX == 0)); 3614 match(Set dst (NegF dst)); 3615 ins_cost(150); 3616 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3617 ins_encode %{ 3618 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3619 %} 3620 ins_pipe(pipe_slow); 3621 %} 3622 3623 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3624 predicate(UseAVX > 0); 3625 match(Set dst (NegF src)); 3626 ins_cost(150); 3627 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3628 ins_encode %{ 3629 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3630 ExternalAddress(float_signflip())); 3631 %} 3632 ins_pipe(pipe_slow); 3633 %} 3634 3635 instruct negD_reg(regD dst) %{ 3636 predicate((UseSSE>=2) && (UseAVX == 0)); 3637 match(Set dst (NegD dst)); 3638 ins_cost(150); 3639 format %{ "xorpd $dst, [0x8000000000000000]\t" 3640 "# neg double by sign flipping" %} 3641 ins_encode %{ 3642 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3643 %} 3644 ins_pipe(pipe_slow); 3645 %} 3646 3647 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3648 predicate(UseAVX > 0); 3649 match(Set dst (NegD src)); 3650 ins_cost(150); 3651 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3652 "# neg double by sign flipping" %} 3653 ins_encode %{ 3654 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3655 ExternalAddress(double_signflip())); 3656 %} 3657 ins_pipe(pipe_slow); 3658 %} 3659 3660 // sqrtss instruction needs destination register to be pre initialized for best performance 3661 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3662 instruct sqrtF_reg(regF dst) %{ 3663 predicate(UseSSE>=1); 3664 match(Set dst (SqrtF dst)); 3665 format %{ "sqrtss $dst, $dst" %} 3666 ins_encode %{ 3667 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3668 %} 3669 ins_pipe(pipe_slow); 3670 %} 3671 3672 // sqrtsd instruction needs destination register to be pre initialized for best performance 3673 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3674 instruct sqrtD_reg(regD dst) %{ 3675 predicate(UseSSE>=2); 3676 match(Set dst (SqrtD dst)); 3677 format %{ "sqrtsd $dst, $dst" %} 3678 ins_encode %{ 3679 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe(pipe_slow); 3682 %} 3683 3684 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3685 effect(TEMP tmp); 3686 match(Set dst (ConvF2HF src)); 3687 ins_cost(125); 3688 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3689 ins_encode %{ 3690 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3696 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3697 effect(TEMP ktmp, TEMP rtmp); 3698 match(Set mem (StoreC mem (ConvF2HF src))); 3699 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3700 ins_encode %{ 3701 __ movl($rtmp$$Register, 0x1); 3702 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3703 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3704 %} 3705 ins_pipe( pipe_slow ); 3706 %} 3707 3708 instruct vconvF2HF(vec dst, vec src) %{ 3709 match(Set dst (VectorCastF2HF src)); 3710 format %{ "vector_conv_F2HF $dst $src" %} 3711 ins_encode %{ 3712 int vlen_enc = vector_length_encoding(this, $src); 3713 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3714 %} 3715 ins_pipe( pipe_slow ); 3716 %} 3717 3718 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3719 predicate(n->as_StoreVector()->memory_size() >= 16); 3720 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3721 format %{ "vcvtps2ph $mem,$src" %} 3722 ins_encode %{ 3723 int vlen_enc = vector_length_encoding(this, $src); 3724 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3725 %} 3726 ins_pipe( pipe_slow ); 3727 %} 3728 3729 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3730 match(Set dst (ConvHF2F src)); 3731 format %{ "vcvtph2ps $dst,$src" %} 3732 ins_encode %{ 3733 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 3738 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3739 match(Set dst (VectorCastHF2F (LoadVector mem))); 3740 format %{ "vcvtph2ps $dst,$mem" %} 3741 ins_encode %{ 3742 int vlen_enc = vector_length_encoding(this); 3743 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3744 %} 3745 ins_pipe( pipe_slow ); 3746 %} 3747 3748 instruct vconvHF2F(vec dst, vec src) %{ 3749 match(Set dst (VectorCastHF2F src)); 3750 ins_cost(125); 3751 format %{ "vector_conv_HF2F $dst,$src" %} 3752 ins_encode %{ 3753 int vlen_enc = vector_length_encoding(this); 3754 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3755 %} 3756 ins_pipe( pipe_slow ); 3757 %} 3758 3759 // ---------------------------------------- VectorReinterpret ------------------------------------ 3760 instruct reinterpret_mask(kReg dst) %{ 3761 predicate(n->bottom_type()->isa_vectmask() && 3762 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3763 match(Set dst (VectorReinterpret dst)); 3764 ins_cost(125); 3765 format %{ "vector_reinterpret $dst\t!" %} 3766 ins_encode %{ 3767 // empty 3768 %} 3769 ins_pipe( pipe_slow ); 3770 %} 3771 3772 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3773 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3774 n->bottom_type()->isa_vectmask() && 3775 n->in(1)->bottom_type()->isa_vectmask() && 3776 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3777 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3778 match(Set dst (VectorReinterpret src)); 3779 effect(TEMP xtmp); 3780 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3781 ins_encode %{ 3782 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3783 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3784 assert(src_sz == dst_sz , "src and dst size mismatch"); 3785 int vlen_enc = vector_length_encoding(src_sz); 3786 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3787 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3788 %} 3789 ins_pipe( pipe_slow ); 3790 %} 3791 3792 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3793 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3794 n->bottom_type()->isa_vectmask() && 3795 n->in(1)->bottom_type()->isa_vectmask() && 3796 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3797 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3798 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3799 match(Set dst (VectorReinterpret src)); 3800 effect(TEMP xtmp); 3801 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3802 ins_encode %{ 3803 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3804 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3805 assert(src_sz == dst_sz , "src and dst size mismatch"); 3806 int vlen_enc = vector_length_encoding(src_sz); 3807 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3808 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3809 %} 3810 ins_pipe( pipe_slow ); 3811 %} 3812 3813 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3814 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3815 n->bottom_type()->isa_vectmask() && 3816 n->in(1)->bottom_type()->isa_vectmask() && 3817 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3818 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3819 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3820 match(Set dst (VectorReinterpret src)); 3821 effect(TEMP xtmp); 3822 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3823 ins_encode %{ 3824 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3825 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3826 assert(src_sz == dst_sz , "src and dst size mismatch"); 3827 int vlen_enc = vector_length_encoding(src_sz); 3828 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3829 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3830 %} 3831 ins_pipe( pipe_slow ); 3832 %} 3833 3834 instruct reinterpret(vec dst) %{ 3835 predicate(!n->bottom_type()->isa_vectmask() && 3836 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3837 match(Set dst (VectorReinterpret dst)); 3838 ins_cost(125); 3839 format %{ "vector_reinterpret $dst\t!" %} 3840 ins_encode %{ 3841 // empty 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 instruct reinterpret_expand(vec dst, vec src) %{ 3847 predicate(UseAVX == 0 && 3848 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3849 match(Set dst (VectorReinterpret src)); 3850 ins_cost(125); 3851 effect(TEMP dst); 3852 format %{ "vector_reinterpret_expand $dst,$src" %} 3853 ins_encode %{ 3854 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3855 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3856 3857 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3858 if (src_vlen_in_bytes == 4) { 3859 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3860 } else { 3861 assert(src_vlen_in_bytes == 8, ""); 3862 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3863 } 3864 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 3869 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3870 predicate(UseAVX > 0 && 3871 !n->bottom_type()->isa_vectmask() && 3872 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3873 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3874 match(Set dst (VectorReinterpret src)); 3875 ins_cost(125); 3876 format %{ "vector_reinterpret_expand $dst,$src" %} 3877 ins_encode %{ 3878 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3879 %} 3880 ins_pipe( pipe_slow ); 3881 %} 3882 3883 3884 instruct vreinterpret_expand(legVec dst, vec src) %{ 3885 predicate(UseAVX > 0 && 3886 !n->bottom_type()->isa_vectmask() && 3887 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3888 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3889 match(Set dst (VectorReinterpret src)); 3890 ins_cost(125); 3891 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3892 ins_encode %{ 3893 switch (Matcher::vector_length_in_bytes(this, $src)) { 3894 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3895 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3896 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3897 default: ShouldNotReachHere(); 3898 } 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 instruct reinterpret_shrink(vec dst, legVec src) %{ 3904 predicate(!n->bottom_type()->isa_vectmask() && 3905 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3906 match(Set dst (VectorReinterpret src)); 3907 ins_cost(125); 3908 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3909 ins_encode %{ 3910 switch (Matcher::vector_length_in_bytes(this)) { 3911 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3912 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3913 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3914 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3915 default: ShouldNotReachHere(); 3916 } 3917 %} 3918 ins_pipe( pipe_slow ); 3919 %} 3920 3921 // ---------------------------------------------------------------------------------------------------- 3922 3923 #ifdef _LP64 3924 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3925 match(Set dst (RoundDoubleMode src rmode)); 3926 format %{ "roundsd $dst,$src" %} 3927 ins_cost(150); 3928 ins_encode %{ 3929 assert(UseSSE >= 4, "required"); 3930 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3931 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3932 } 3933 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3934 %} 3935 ins_pipe(pipe_slow); 3936 %} 3937 3938 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3939 match(Set dst (RoundDoubleMode con rmode)); 3940 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3941 ins_cost(150); 3942 ins_encode %{ 3943 assert(UseSSE >= 4, "required"); 3944 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3945 %} 3946 ins_pipe(pipe_slow); 3947 %} 3948 3949 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3950 predicate(Matcher::vector_length(n) < 8); 3951 match(Set dst (RoundDoubleModeV src rmode)); 3952 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3953 ins_encode %{ 3954 assert(UseAVX > 0, "required"); 3955 int vlen_enc = vector_length_encoding(this); 3956 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3957 %} 3958 ins_pipe( pipe_slow ); 3959 %} 3960 3961 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3962 predicate(Matcher::vector_length(n) == 8); 3963 match(Set dst (RoundDoubleModeV src rmode)); 3964 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3965 ins_encode %{ 3966 assert(UseAVX > 2, "required"); 3967 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3968 %} 3969 ins_pipe( pipe_slow ); 3970 %} 3971 3972 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3973 predicate(Matcher::vector_length(n) < 8); 3974 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3975 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3976 ins_encode %{ 3977 assert(UseAVX > 0, "required"); 3978 int vlen_enc = vector_length_encoding(this); 3979 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3980 %} 3981 ins_pipe( pipe_slow ); 3982 %} 3983 3984 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3985 predicate(Matcher::vector_length(n) == 8); 3986 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3987 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3988 ins_encode %{ 3989 assert(UseAVX > 2, "required"); 3990 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3991 %} 3992 ins_pipe( pipe_slow ); 3993 %} 3994 #endif // _LP64 3995 3996 instruct onspinwait() %{ 3997 match(OnSpinWait); 3998 ins_cost(200); 3999 4000 format %{ 4001 $$template 4002 $$emit$$"pause\t! membar_onspinwait" 4003 %} 4004 ins_encode %{ 4005 __ pause(); 4006 %} 4007 ins_pipe(pipe_slow); 4008 %} 4009 4010 // a * b + c 4011 instruct fmaD_reg(regD a, regD b, regD c) %{ 4012 match(Set c (FmaD c (Binary a b))); 4013 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4014 ins_cost(150); 4015 ins_encode %{ 4016 assert(UseFMA, "Needs FMA instructions support."); 4017 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // a * b + c 4023 instruct fmaF_reg(regF a, regF b, regF c) %{ 4024 match(Set c (FmaF c (Binary a b))); 4025 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4026 ins_cost(150); 4027 ins_encode %{ 4028 assert(UseFMA, "Needs FMA instructions support."); 4029 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 // ====================VECTOR INSTRUCTIONS===================================== 4035 4036 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4037 instruct MoveVec2Leg(legVec dst, vec src) %{ 4038 match(Set dst src); 4039 format %{ "" %} 4040 ins_encode %{ 4041 ShouldNotReachHere(); 4042 %} 4043 ins_pipe( fpu_reg_reg ); 4044 %} 4045 4046 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4047 match(Set dst src); 4048 format %{ "" %} 4049 ins_encode %{ 4050 ShouldNotReachHere(); 4051 %} 4052 ins_pipe( fpu_reg_reg ); 4053 %} 4054 4055 // ============================================================================ 4056 4057 // Load vectors generic operand pattern 4058 instruct loadV(vec dst, memory mem) %{ 4059 match(Set dst (LoadVector mem)); 4060 ins_cost(125); 4061 format %{ "load_vector $dst,$mem" %} 4062 ins_encode %{ 4063 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4064 %} 4065 ins_pipe( pipe_slow ); 4066 %} 4067 4068 // Store vectors generic operand pattern. 4069 instruct storeV(memory mem, vec src) %{ 4070 match(Set mem (StoreVector mem src)); 4071 ins_cost(145); 4072 format %{ "store_vector $mem,$src\n\t" %} 4073 ins_encode %{ 4074 switch (Matcher::vector_length_in_bytes(this, $src)) { 4075 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4076 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4077 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4078 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4079 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4080 default: ShouldNotReachHere(); 4081 } 4082 %} 4083 ins_pipe( pipe_slow ); 4084 %} 4085 4086 // ---------------------------------------- Gather ------------------------------------ 4087 4088 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4089 4090 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4091 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4092 Matcher::vector_length_in_bytes(n) <= 32); 4093 match(Set dst (LoadVectorGather mem idx)); 4094 effect(TEMP dst, TEMP tmp, TEMP mask); 4095 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4096 ins_encode %{ 4097 int vlen_enc = vector_length_encoding(this); 4098 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4099 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4100 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4101 __ lea($tmp$$Register, $mem$$Address); 4102 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 4107 4108 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4109 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4110 !is_subword_type(Matcher::vector_element_basic_type(n))); 4111 match(Set dst (LoadVectorGather mem idx)); 4112 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4113 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4114 ins_encode %{ 4115 int vlen_enc = vector_length_encoding(this); 4116 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4117 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4118 __ lea($tmp$$Register, $mem$$Address); 4119 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 4124 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4125 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4126 !is_subword_type(Matcher::vector_element_basic_type(n))); 4127 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4128 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4129 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4130 ins_encode %{ 4131 assert(UseAVX > 2, "sanity"); 4132 int vlen_enc = vector_length_encoding(this); 4133 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4134 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4135 // Note: Since gather instruction partially updates the opmask register used 4136 // for predication hense moving mask operand to a temporary. 4137 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4138 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4139 __ lea($tmp$$Register, $mem$$Address); 4140 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4141 %} 4142 ins_pipe( pipe_slow ); 4143 %} 4144 4145 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4146 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4147 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4148 effect(TEMP tmp, TEMP rtmp); 4149 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4150 ins_encode %{ 4151 int vlen_enc = vector_length_encoding(this); 4152 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4153 __ lea($tmp$$Register, $mem$$Address); 4154 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4160 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4161 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4162 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4163 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4164 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4165 ins_encode %{ 4166 int vlen_enc = vector_length_encoding(this); 4167 int vector_len = Matcher::vector_length(this); 4168 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4169 __ lea($tmp$$Register, $mem$$Address); 4170 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4171 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4172 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4173 %} 4174 ins_pipe( pipe_slow ); 4175 %} 4176 4177 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4178 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4179 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4180 effect(TEMP tmp, TEMP rtmp, KILL cr); 4181 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4182 ins_encode %{ 4183 int vlen_enc = vector_length_encoding(this); 4184 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4185 __ lea($tmp$$Register, $mem$$Address); 4186 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 4192 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4193 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4194 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4195 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4196 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4197 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4198 ins_encode %{ 4199 int vlen_enc = vector_length_encoding(this); 4200 int vector_len = Matcher::vector_length(this); 4201 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4202 __ lea($tmp$$Register, $mem$$Address); 4203 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4204 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4205 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 4211 #ifdef _LP64 4212 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4213 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4214 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4215 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4216 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4217 ins_encode %{ 4218 int vlen_enc = vector_length_encoding(this); 4219 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4220 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4221 __ lea($tmp$$Register, $mem$$Address); 4222 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4223 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4229 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4230 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4231 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4232 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4233 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4234 ins_encode %{ 4235 int vlen_enc = vector_length_encoding(this); 4236 int vector_len = Matcher::vector_length(this); 4237 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4238 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4239 __ lea($tmp$$Register, $mem$$Address); 4240 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4241 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4242 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4243 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4244 %} 4245 ins_pipe( pipe_slow ); 4246 %} 4247 4248 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4249 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4250 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4251 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4252 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4253 ins_encode %{ 4254 int vlen_enc = vector_length_encoding(this); 4255 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4256 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4257 __ lea($tmp$$Register, $mem$$Address); 4258 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4259 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4260 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4266 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4267 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4268 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4269 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4270 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4271 ins_encode %{ 4272 int vlen_enc = vector_length_encoding(this); 4273 int vector_len = Matcher::vector_length(this); 4274 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4275 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4276 __ lea($tmp$$Register, $mem$$Address); 4277 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4278 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4279 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4280 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4281 %} 4282 ins_pipe( pipe_slow ); 4283 %} 4284 4285 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4286 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4287 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4288 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4289 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4290 ins_encode %{ 4291 int vlen_enc = vector_length_encoding(this); 4292 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4293 __ lea($tmp$$Register, $mem$$Address); 4294 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4295 if (elem_bt == T_SHORT) { 4296 __ movl($mask_idx$$Register, 0x55555555); 4297 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4298 } 4299 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4300 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4301 %} 4302 ins_pipe( pipe_slow ); 4303 %} 4304 4305 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4306 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4307 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4308 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4309 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4310 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4311 ins_encode %{ 4312 int vlen_enc = vector_length_encoding(this); 4313 int vector_len = Matcher::vector_length(this); 4314 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4315 __ lea($tmp$$Register, $mem$$Address); 4316 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4317 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4318 if (elem_bt == T_SHORT) { 4319 __ movl($mask_idx$$Register, 0x55555555); 4320 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4321 } 4322 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4323 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4324 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4330 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4331 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4332 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4333 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4334 ins_encode %{ 4335 int vlen_enc = vector_length_encoding(this); 4336 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4337 __ lea($tmp$$Register, $mem$$Address); 4338 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4339 if (elem_bt == T_SHORT) { 4340 __ movl($mask_idx$$Register, 0x55555555); 4341 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4342 } 4343 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4344 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4345 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4346 %} 4347 ins_pipe( pipe_slow ); 4348 %} 4349 4350 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4351 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4352 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4353 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4354 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4355 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4356 ins_encode %{ 4357 int vlen_enc = vector_length_encoding(this); 4358 int vector_len = Matcher::vector_length(this); 4359 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4360 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4361 __ lea($tmp$$Register, $mem$$Address); 4362 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4363 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4364 if (elem_bt == T_SHORT) { 4365 __ movl($mask_idx$$Register, 0x55555555); 4366 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4367 } 4368 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4369 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4370 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 #endif 4375 4376 // ====================Scatter======================================= 4377 4378 // Scatter INT, LONG, FLOAT, DOUBLE 4379 4380 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4381 predicate(UseAVX > 2); 4382 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4383 effect(TEMP tmp, TEMP ktmp); 4384 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4385 ins_encode %{ 4386 int vlen_enc = vector_length_encoding(this, $src); 4387 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4388 4389 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4390 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4391 4392 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4393 __ lea($tmp$$Register, $mem$$Address); 4394 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4400 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4401 effect(TEMP tmp, TEMP ktmp); 4402 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4403 ins_encode %{ 4404 int vlen_enc = vector_length_encoding(this, $src); 4405 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4406 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4407 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4408 // Note: Since scatter instruction partially updates the opmask register used 4409 // for predication hense moving mask operand to a temporary. 4410 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4411 __ lea($tmp$$Register, $mem$$Address); 4412 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4413 %} 4414 ins_pipe( pipe_slow ); 4415 %} 4416 4417 // ====================REPLICATE======================================= 4418 4419 // Replicate byte scalar to be vector 4420 instruct vReplB_reg(vec dst, rRegI src) %{ 4421 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4422 match(Set dst (Replicate src)); 4423 format %{ "replicateB $dst,$src" %} 4424 ins_encode %{ 4425 uint vlen = Matcher::vector_length(this); 4426 if (UseAVX >= 2) { 4427 int vlen_enc = vector_length_encoding(this); 4428 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4429 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4430 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4431 } else { 4432 __ movdl($dst$$XMMRegister, $src$$Register); 4433 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4434 } 4435 } else { 4436 assert(UseAVX < 2, ""); 4437 __ movdl($dst$$XMMRegister, $src$$Register); 4438 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4439 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4440 if (vlen >= 16) { 4441 assert(vlen == 16, ""); 4442 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4443 } 4444 } 4445 %} 4446 ins_pipe( pipe_slow ); 4447 %} 4448 4449 instruct ReplB_mem(vec dst, memory mem) %{ 4450 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4451 match(Set dst (Replicate (LoadB mem))); 4452 format %{ "replicateB $dst,$mem" %} 4453 ins_encode %{ 4454 int vlen_enc = vector_length_encoding(this); 4455 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4456 %} 4457 ins_pipe( pipe_slow ); 4458 %} 4459 4460 // ====================ReplicateS======================================= 4461 4462 instruct vReplS_reg(vec dst, rRegI src) %{ 4463 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4464 match(Set dst (Replicate src)); 4465 format %{ "replicateS $dst,$src" %} 4466 ins_encode %{ 4467 uint vlen = Matcher::vector_length(this); 4468 int vlen_enc = vector_length_encoding(this); 4469 if (UseAVX >= 2) { 4470 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4471 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4472 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4473 } else { 4474 __ movdl($dst$$XMMRegister, $src$$Register); 4475 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4476 } 4477 } else { 4478 assert(UseAVX < 2, ""); 4479 __ movdl($dst$$XMMRegister, $src$$Register); 4480 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4481 if (vlen >= 8) { 4482 assert(vlen == 8, ""); 4483 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4484 } 4485 } 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct ReplS_mem(vec dst, memory mem) %{ 4491 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4492 match(Set dst (Replicate (LoadS mem))); 4493 format %{ "replicateS $dst,$mem" %} 4494 ins_encode %{ 4495 int vlen_enc = vector_length_encoding(this); 4496 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4497 %} 4498 ins_pipe( pipe_slow ); 4499 %} 4500 4501 // ====================ReplicateI======================================= 4502 4503 instruct ReplI_reg(vec dst, rRegI src) %{ 4504 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4505 match(Set dst (Replicate src)); 4506 format %{ "replicateI $dst,$src" %} 4507 ins_encode %{ 4508 uint vlen = Matcher::vector_length(this); 4509 int vlen_enc = vector_length_encoding(this); 4510 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4511 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4512 } else if (VM_Version::supports_avx2()) { 4513 __ movdl($dst$$XMMRegister, $src$$Register); 4514 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4515 } else { 4516 __ movdl($dst$$XMMRegister, $src$$Register); 4517 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4518 } 4519 %} 4520 ins_pipe( pipe_slow ); 4521 %} 4522 4523 instruct ReplI_mem(vec dst, memory mem) %{ 4524 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4525 match(Set dst (Replicate (LoadI mem))); 4526 format %{ "replicateI $dst,$mem" %} 4527 ins_encode %{ 4528 int vlen_enc = vector_length_encoding(this); 4529 if (VM_Version::supports_avx2()) { 4530 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4531 } else if (VM_Version::supports_avx()) { 4532 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4533 } else { 4534 __ movdl($dst$$XMMRegister, $mem$$Address); 4535 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4536 } 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 instruct ReplI_imm(vec dst, immI con) %{ 4542 predicate(Matcher::is_non_long_integral_vector(n)); 4543 match(Set dst (Replicate con)); 4544 format %{ "replicateI $dst,$con" %} 4545 ins_encode %{ 4546 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4547 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4548 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4549 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4550 BasicType bt = Matcher::vector_element_basic_type(this); 4551 int vlen = Matcher::vector_length_in_bytes(this); 4552 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4553 %} 4554 ins_pipe( pipe_slow ); 4555 %} 4556 4557 // Replicate scalar zero to be vector 4558 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4559 predicate(Matcher::is_non_long_integral_vector(n)); 4560 match(Set dst (Replicate zero)); 4561 format %{ "replicateI $dst,$zero" %} 4562 ins_encode %{ 4563 int vlen_enc = vector_length_encoding(this); 4564 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4565 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4566 } else { 4567 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4568 } 4569 %} 4570 ins_pipe( fpu_reg_reg ); 4571 %} 4572 4573 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4574 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4575 match(Set dst (Replicate con)); 4576 format %{ "vallones $dst" %} 4577 ins_encode %{ 4578 int vector_len = vector_length_encoding(this); 4579 __ vallones($dst$$XMMRegister, vector_len); 4580 %} 4581 ins_pipe( pipe_slow ); 4582 %} 4583 4584 // ====================ReplicateL======================================= 4585 4586 #ifdef _LP64 4587 // Replicate long (8 byte) scalar to be vector 4588 instruct ReplL_reg(vec dst, rRegL src) %{ 4589 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4590 match(Set dst (Replicate src)); 4591 format %{ "replicateL $dst,$src" %} 4592 ins_encode %{ 4593 int vlen = Matcher::vector_length(this); 4594 int vlen_enc = vector_length_encoding(this); 4595 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4596 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4597 } else if (VM_Version::supports_avx2()) { 4598 __ movdq($dst$$XMMRegister, $src$$Register); 4599 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4600 } else { 4601 __ movdq($dst$$XMMRegister, $src$$Register); 4602 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4603 } 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 #else // _LP64 4608 // Replicate long (8 byte) scalar to be vector 4609 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4610 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4611 match(Set dst (Replicate src)); 4612 effect(TEMP dst, USE src, TEMP tmp); 4613 format %{ "replicateL $dst,$src" %} 4614 ins_encode %{ 4615 uint vlen = Matcher::vector_length(this); 4616 if (vlen == 2) { 4617 __ movdl($dst$$XMMRegister, $src$$Register); 4618 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4619 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4620 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4621 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4622 int vlen_enc = Assembler::AVX_256bit; 4623 __ movdl($dst$$XMMRegister, $src$$Register); 4624 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4625 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4626 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4627 } else { 4628 __ movdl($dst$$XMMRegister, $src$$Register); 4629 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4630 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4631 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4632 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4633 } 4634 %} 4635 ins_pipe( pipe_slow ); 4636 %} 4637 4638 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4639 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4640 match(Set dst (Replicate src)); 4641 effect(TEMP dst, USE src, TEMP tmp); 4642 format %{ "replicateL $dst,$src" %} 4643 ins_encode %{ 4644 if (VM_Version::supports_avx512vl()) { 4645 __ movdl($dst$$XMMRegister, $src$$Register); 4646 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4647 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4648 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4649 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4650 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4651 } else { 4652 int vlen_enc = Assembler::AVX_512bit; 4653 __ movdl($dst$$XMMRegister, $src$$Register); 4654 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4655 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4656 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4657 } 4658 %} 4659 ins_pipe( pipe_slow ); 4660 %} 4661 #endif // _LP64 4662 4663 instruct ReplL_mem(vec dst, memory mem) %{ 4664 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4665 match(Set dst (Replicate (LoadL mem))); 4666 format %{ "replicateL $dst,$mem" %} 4667 ins_encode %{ 4668 int vlen_enc = vector_length_encoding(this); 4669 if (VM_Version::supports_avx2()) { 4670 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4671 } else if (VM_Version::supports_sse3()) { 4672 __ movddup($dst$$XMMRegister, $mem$$Address); 4673 } else { 4674 __ movq($dst$$XMMRegister, $mem$$Address); 4675 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4676 } 4677 %} 4678 ins_pipe( pipe_slow ); 4679 %} 4680 4681 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4682 instruct ReplL_imm(vec dst, immL con) %{ 4683 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4684 match(Set dst (Replicate con)); 4685 format %{ "replicateL $dst,$con" %} 4686 ins_encode %{ 4687 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4688 int vlen = Matcher::vector_length_in_bytes(this); 4689 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 4694 instruct ReplL_zero(vec dst, immL0 zero) %{ 4695 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4696 match(Set dst (Replicate zero)); 4697 format %{ "replicateL $dst,$zero" %} 4698 ins_encode %{ 4699 int vlen_enc = vector_length_encoding(this); 4700 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4701 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4702 } else { 4703 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4704 } 4705 %} 4706 ins_pipe( fpu_reg_reg ); 4707 %} 4708 4709 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4710 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4711 match(Set dst (Replicate con)); 4712 format %{ "vallones $dst" %} 4713 ins_encode %{ 4714 int vector_len = vector_length_encoding(this); 4715 __ vallones($dst$$XMMRegister, vector_len); 4716 %} 4717 ins_pipe( pipe_slow ); 4718 %} 4719 4720 // ====================ReplicateF======================================= 4721 4722 instruct vReplF_reg(vec dst, vlRegF src) %{ 4723 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4724 match(Set dst (Replicate src)); 4725 format %{ "replicateF $dst,$src" %} 4726 ins_encode %{ 4727 uint vlen = Matcher::vector_length(this); 4728 int vlen_enc = vector_length_encoding(this); 4729 if (vlen <= 4) { 4730 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4731 } else if (VM_Version::supports_avx2()) { 4732 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4733 } else { 4734 assert(vlen == 8, "sanity"); 4735 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4736 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4737 } 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 instruct ReplF_reg(vec dst, vlRegF src) %{ 4743 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4744 match(Set dst (Replicate src)); 4745 format %{ "replicateF $dst,$src" %} 4746 ins_encode %{ 4747 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct ReplF_mem(vec dst, memory mem) %{ 4753 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4754 match(Set dst (Replicate (LoadF mem))); 4755 format %{ "replicateF $dst,$mem" %} 4756 ins_encode %{ 4757 int vlen_enc = vector_length_encoding(this); 4758 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4759 %} 4760 ins_pipe( pipe_slow ); 4761 %} 4762 4763 // Replicate float scalar immediate to be vector by loading from const table. 4764 instruct ReplF_imm(vec dst, immF con) %{ 4765 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4766 match(Set dst (Replicate con)); 4767 format %{ "replicateF $dst,$con" %} 4768 ins_encode %{ 4769 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4770 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4771 int vlen = Matcher::vector_length_in_bytes(this); 4772 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4773 %} 4774 ins_pipe( pipe_slow ); 4775 %} 4776 4777 instruct ReplF_zero(vec dst, immF0 zero) %{ 4778 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4779 match(Set dst (Replicate zero)); 4780 format %{ "replicateF $dst,$zero" %} 4781 ins_encode %{ 4782 int vlen_enc = vector_length_encoding(this); 4783 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4784 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4785 } else { 4786 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4787 } 4788 %} 4789 ins_pipe( fpu_reg_reg ); 4790 %} 4791 4792 // ====================ReplicateD======================================= 4793 4794 // Replicate double (8 bytes) scalar to be vector 4795 instruct vReplD_reg(vec dst, vlRegD src) %{ 4796 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4797 match(Set dst (Replicate src)); 4798 format %{ "replicateD $dst,$src" %} 4799 ins_encode %{ 4800 uint vlen = Matcher::vector_length(this); 4801 int vlen_enc = vector_length_encoding(this); 4802 if (vlen <= 2) { 4803 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4804 } else if (VM_Version::supports_avx2()) { 4805 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4806 } else { 4807 assert(vlen == 4, "sanity"); 4808 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4809 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4810 } 4811 %} 4812 ins_pipe( pipe_slow ); 4813 %} 4814 4815 instruct ReplD_reg(vec dst, vlRegD src) %{ 4816 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4817 match(Set dst (Replicate src)); 4818 format %{ "replicateD $dst,$src" %} 4819 ins_encode %{ 4820 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4821 %} 4822 ins_pipe( pipe_slow ); 4823 %} 4824 4825 instruct ReplD_mem(vec dst, memory mem) %{ 4826 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4827 match(Set dst (Replicate (LoadD mem))); 4828 format %{ "replicateD $dst,$mem" %} 4829 ins_encode %{ 4830 if (Matcher::vector_length(this) >= 4) { 4831 int vlen_enc = vector_length_encoding(this); 4832 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4833 } else { 4834 __ movddup($dst$$XMMRegister, $mem$$Address); 4835 } 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4841 instruct ReplD_imm(vec dst, immD con) %{ 4842 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4843 match(Set dst (Replicate con)); 4844 format %{ "replicateD $dst,$con" %} 4845 ins_encode %{ 4846 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4847 int vlen = Matcher::vector_length_in_bytes(this); 4848 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct ReplD_zero(vec dst, immD0 zero) %{ 4854 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4855 match(Set dst (Replicate zero)); 4856 format %{ "replicateD $dst,$zero" %} 4857 ins_encode %{ 4858 int vlen_enc = vector_length_encoding(this); 4859 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4860 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4861 } else { 4862 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4863 } 4864 %} 4865 ins_pipe( fpu_reg_reg ); 4866 %} 4867 4868 // ====================VECTOR INSERT======================================= 4869 4870 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4871 predicate(Matcher::vector_length_in_bytes(n) < 32); 4872 match(Set dst (VectorInsert (Binary dst val) idx)); 4873 format %{ "vector_insert $dst,$val,$idx" %} 4874 ins_encode %{ 4875 assert(UseSSE >= 4, "required"); 4876 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4877 4878 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4879 4880 assert(is_integral_type(elem_bt), ""); 4881 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4882 4883 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4884 %} 4885 ins_pipe( pipe_slow ); 4886 %} 4887 4888 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4889 predicate(Matcher::vector_length_in_bytes(n) == 32); 4890 match(Set dst (VectorInsert (Binary src val) idx)); 4891 effect(TEMP vtmp); 4892 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4893 ins_encode %{ 4894 int vlen_enc = Assembler::AVX_256bit; 4895 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4896 int elem_per_lane = 16/type2aelembytes(elem_bt); 4897 int log2epr = log2(elem_per_lane); 4898 4899 assert(is_integral_type(elem_bt), "sanity"); 4900 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4901 4902 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4903 uint y_idx = ($idx$$constant >> log2epr) & 1; 4904 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4905 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4906 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4907 %} 4908 ins_pipe( pipe_slow ); 4909 %} 4910 4911 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4912 predicate(Matcher::vector_length_in_bytes(n) == 64); 4913 match(Set dst (VectorInsert (Binary src val) idx)); 4914 effect(TEMP vtmp); 4915 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4916 ins_encode %{ 4917 assert(UseAVX > 2, "sanity"); 4918 4919 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4920 int elem_per_lane = 16/type2aelembytes(elem_bt); 4921 int log2epr = log2(elem_per_lane); 4922 4923 assert(is_integral_type(elem_bt), ""); 4924 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4925 4926 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4927 uint y_idx = ($idx$$constant >> log2epr) & 3; 4928 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4929 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4930 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4931 %} 4932 ins_pipe( pipe_slow ); 4933 %} 4934 4935 #ifdef _LP64 4936 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4937 predicate(Matcher::vector_length(n) == 2); 4938 match(Set dst (VectorInsert (Binary dst val) idx)); 4939 format %{ "vector_insert $dst,$val,$idx" %} 4940 ins_encode %{ 4941 assert(UseSSE >= 4, "required"); 4942 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4943 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4944 4945 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4946 %} 4947 ins_pipe( pipe_slow ); 4948 %} 4949 4950 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4951 predicate(Matcher::vector_length(n) == 4); 4952 match(Set dst (VectorInsert (Binary src val) idx)); 4953 effect(TEMP vtmp); 4954 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4955 ins_encode %{ 4956 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4957 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4958 4959 uint x_idx = $idx$$constant & right_n_bits(1); 4960 uint y_idx = ($idx$$constant >> 1) & 1; 4961 int vlen_enc = Assembler::AVX_256bit; 4962 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4963 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4964 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4965 %} 4966 ins_pipe( pipe_slow ); 4967 %} 4968 4969 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4970 predicate(Matcher::vector_length(n) == 8); 4971 match(Set dst (VectorInsert (Binary src val) idx)); 4972 effect(TEMP vtmp); 4973 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4974 ins_encode %{ 4975 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4976 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4977 4978 uint x_idx = $idx$$constant & right_n_bits(1); 4979 uint y_idx = ($idx$$constant >> 1) & 3; 4980 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4981 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4982 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 #endif 4987 4988 instruct insertF(vec dst, regF val, immU8 idx) %{ 4989 predicate(Matcher::vector_length(n) < 8); 4990 match(Set dst (VectorInsert (Binary dst val) idx)); 4991 format %{ "vector_insert $dst,$val,$idx" %} 4992 ins_encode %{ 4993 assert(UseSSE >= 4, "sanity"); 4994 4995 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4996 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4997 4998 uint x_idx = $idx$$constant & right_n_bits(2); 4999 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5000 %} 5001 ins_pipe( pipe_slow ); 5002 %} 5003 5004 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5005 predicate(Matcher::vector_length(n) >= 8); 5006 match(Set dst (VectorInsert (Binary src val) idx)); 5007 effect(TEMP vtmp); 5008 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5009 ins_encode %{ 5010 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5011 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5012 5013 int vlen = Matcher::vector_length(this); 5014 uint x_idx = $idx$$constant & right_n_bits(2); 5015 if (vlen == 8) { 5016 uint y_idx = ($idx$$constant >> 2) & 1; 5017 int vlen_enc = Assembler::AVX_256bit; 5018 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5019 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5020 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5021 } else { 5022 assert(vlen == 16, "sanity"); 5023 uint y_idx = ($idx$$constant >> 2) & 3; 5024 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5025 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5026 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5027 } 5028 %} 5029 ins_pipe( pipe_slow ); 5030 %} 5031 5032 #ifdef _LP64 5033 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5034 predicate(Matcher::vector_length(n) == 2); 5035 match(Set dst (VectorInsert (Binary dst val) idx)); 5036 effect(TEMP tmp); 5037 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5038 ins_encode %{ 5039 assert(UseSSE >= 4, "sanity"); 5040 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5041 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5042 5043 __ movq($tmp$$Register, $val$$XMMRegister); 5044 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5050 predicate(Matcher::vector_length(n) == 4); 5051 match(Set dst (VectorInsert (Binary src val) idx)); 5052 effect(TEMP vtmp, TEMP tmp); 5053 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5054 ins_encode %{ 5055 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5056 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5057 5058 uint x_idx = $idx$$constant & right_n_bits(1); 5059 uint y_idx = ($idx$$constant >> 1) & 1; 5060 int vlen_enc = Assembler::AVX_256bit; 5061 __ movq($tmp$$Register, $val$$XMMRegister); 5062 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5063 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5064 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5065 %} 5066 ins_pipe( pipe_slow ); 5067 %} 5068 5069 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5070 predicate(Matcher::vector_length(n) == 8); 5071 match(Set dst (VectorInsert (Binary src val) idx)); 5072 effect(TEMP tmp, TEMP vtmp); 5073 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5074 ins_encode %{ 5075 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5076 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5077 5078 uint x_idx = $idx$$constant & right_n_bits(1); 5079 uint y_idx = ($idx$$constant >> 1) & 3; 5080 __ movq($tmp$$Register, $val$$XMMRegister); 5081 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5082 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5083 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5084 %} 5085 ins_pipe( pipe_slow ); 5086 %} 5087 #endif 5088 5089 // ====================REDUCTION ARITHMETIC======================================= 5090 5091 // =======================Int Reduction========================================== 5092 5093 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5094 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5095 match(Set dst (AddReductionVI src1 src2)); 5096 match(Set dst (MulReductionVI src1 src2)); 5097 match(Set dst (AndReductionV src1 src2)); 5098 match(Set dst ( OrReductionV src1 src2)); 5099 match(Set dst (XorReductionV src1 src2)); 5100 match(Set dst (MinReductionV src1 src2)); 5101 match(Set dst (MaxReductionV src1 src2)); 5102 effect(TEMP vtmp1, TEMP vtmp2); 5103 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5104 ins_encode %{ 5105 int opcode = this->ideal_Opcode(); 5106 int vlen = Matcher::vector_length(this, $src2); 5107 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5108 %} 5109 ins_pipe( pipe_slow ); 5110 %} 5111 5112 // =======================Long Reduction========================================== 5113 5114 #ifdef _LP64 5115 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5116 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5117 match(Set dst (AddReductionVL src1 src2)); 5118 match(Set dst (MulReductionVL src1 src2)); 5119 match(Set dst (AndReductionV src1 src2)); 5120 match(Set dst ( OrReductionV src1 src2)); 5121 match(Set dst (XorReductionV src1 src2)); 5122 match(Set dst (MinReductionV src1 src2)); 5123 match(Set dst (MaxReductionV src1 src2)); 5124 effect(TEMP vtmp1, TEMP vtmp2); 5125 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5126 ins_encode %{ 5127 int opcode = this->ideal_Opcode(); 5128 int vlen = Matcher::vector_length(this, $src2); 5129 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5130 %} 5131 ins_pipe( pipe_slow ); 5132 %} 5133 5134 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5135 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5136 match(Set dst (AddReductionVL src1 src2)); 5137 match(Set dst (MulReductionVL src1 src2)); 5138 match(Set dst (AndReductionV src1 src2)); 5139 match(Set dst ( OrReductionV src1 src2)); 5140 match(Set dst (XorReductionV src1 src2)); 5141 match(Set dst (MinReductionV src1 src2)); 5142 match(Set dst (MaxReductionV src1 src2)); 5143 effect(TEMP vtmp1, TEMP vtmp2); 5144 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5145 ins_encode %{ 5146 int opcode = this->ideal_Opcode(); 5147 int vlen = Matcher::vector_length(this, $src2); 5148 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 #endif // _LP64 5153 5154 // =======================Float Reduction========================================== 5155 5156 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5157 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5158 match(Set dst (AddReductionVF dst src)); 5159 match(Set dst (MulReductionVF dst src)); 5160 effect(TEMP dst, TEMP vtmp); 5161 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5162 ins_encode %{ 5163 int opcode = this->ideal_Opcode(); 5164 int vlen = Matcher::vector_length(this, $src); 5165 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5166 %} 5167 ins_pipe( pipe_slow ); 5168 %} 5169 5170 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5171 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5172 match(Set dst (AddReductionVF dst src)); 5173 match(Set dst (MulReductionVF dst src)); 5174 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5175 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5176 ins_encode %{ 5177 int opcode = this->ideal_Opcode(); 5178 int vlen = Matcher::vector_length(this, $src); 5179 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5180 %} 5181 ins_pipe( pipe_slow ); 5182 %} 5183 5184 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5185 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5186 match(Set dst (AddReductionVF dst src)); 5187 match(Set dst (MulReductionVF dst src)); 5188 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5189 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5190 ins_encode %{ 5191 int opcode = this->ideal_Opcode(); 5192 int vlen = Matcher::vector_length(this, $src); 5193 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5194 %} 5195 ins_pipe( pipe_slow ); 5196 %} 5197 5198 5199 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5200 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5201 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5202 // src1 contains reduction identity 5203 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5204 match(Set dst (AddReductionVF src1 src2)); 5205 match(Set dst (MulReductionVF src1 src2)); 5206 effect(TEMP dst); 5207 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5208 ins_encode %{ 5209 int opcode = this->ideal_Opcode(); 5210 int vlen = Matcher::vector_length(this, $src2); 5211 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5212 %} 5213 ins_pipe( pipe_slow ); 5214 %} 5215 5216 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5217 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5218 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5219 // src1 contains reduction identity 5220 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5221 match(Set dst (AddReductionVF src1 src2)); 5222 match(Set dst (MulReductionVF src1 src2)); 5223 effect(TEMP dst, TEMP vtmp); 5224 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5225 ins_encode %{ 5226 int opcode = this->ideal_Opcode(); 5227 int vlen = Matcher::vector_length(this, $src2); 5228 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5229 %} 5230 ins_pipe( pipe_slow ); 5231 %} 5232 5233 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5234 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5235 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5236 // src1 contains reduction identity 5237 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5238 match(Set dst (AddReductionVF src1 src2)); 5239 match(Set dst (MulReductionVF src1 src2)); 5240 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5241 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5242 ins_encode %{ 5243 int opcode = this->ideal_Opcode(); 5244 int vlen = Matcher::vector_length(this, $src2); 5245 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5246 %} 5247 ins_pipe( pipe_slow ); 5248 %} 5249 5250 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5251 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5252 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5253 // src1 contains reduction identity 5254 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5255 match(Set dst (AddReductionVF src1 src2)); 5256 match(Set dst (MulReductionVF src1 src2)); 5257 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5258 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5259 ins_encode %{ 5260 int opcode = this->ideal_Opcode(); 5261 int vlen = Matcher::vector_length(this, $src2); 5262 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5263 %} 5264 ins_pipe( pipe_slow ); 5265 %} 5266 5267 // =======================Double Reduction========================================== 5268 5269 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5270 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5271 match(Set dst (AddReductionVD dst src)); 5272 match(Set dst (MulReductionVD dst src)); 5273 effect(TEMP dst, TEMP vtmp); 5274 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5275 ins_encode %{ 5276 int opcode = this->ideal_Opcode(); 5277 int vlen = Matcher::vector_length(this, $src); 5278 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5279 %} 5280 ins_pipe( pipe_slow ); 5281 %} 5282 5283 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5284 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5285 match(Set dst (AddReductionVD dst src)); 5286 match(Set dst (MulReductionVD dst src)); 5287 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5288 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5289 ins_encode %{ 5290 int opcode = this->ideal_Opcode(); 5291 int vlen = Matcher::vector_length(this, $src); 5292 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 5297 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5298 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5299 match(Set dst (AddReductionVD dst src)); 5300 match(Set dst (MulReductionVD dst src)); 5301 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5302 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5303 ins_encode %{ 5304 int opcode = this->ideal_Opcode(); 5305 int vlen = Matcher::vector_length(this, $src); 5306 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5307 %} 5308 ins_pipe( pipe_slow ); 5309 %} 5310 5311 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5312 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5313 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5314 // src1 contains reduction identity 5315 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5316 match(Set dst (AddReductionVD src1 src2)); 5317 match(Set dst (MulReductionVD src1 src2)); 5318 effect(TEMP dst); 5319 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5320 ins_encode %{ 5321 int opcode = this->ideal_Opcode(); 5322 int vlen = Matcher::vector_length(this, $src2); 5323 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5329 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5330 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5331 // src1 contains reduction identity 5332 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5333 match(Set dst (AddReductionVD src1 src2)); 5334 match(Set dst (MulReductionVD src1 src2)); 5335 effect(TEMP dst, TEMP vtmp); 5336 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5337 ins_encode %{ 5338 int opcode = this->ideal_Opcode(); 5339 int vlen = Matcher::vector_length(this, $src2); 5340 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5341 %} 5342 ins_pipe( pipe_slow ); 5343 %} 5344 5345 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5346 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5347 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5348 // src1 contains reduction identity 5349 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5350 match(Set dst (AddReductionVD src1 src2)); 5351 match(Set dst (MulReductionVD src1 src2)); 5352 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5353 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5354 ins_encode %{ 5355 int opcode = this->ideal_Opcode(); 5356 int vlen = Matcher::vector_length(this, $src2); 5357 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5358 %} 5359 ins_pipe( pipe_slow ); 5360 %} 5361 5362 // =======================Byte Reduction========================================== 5363 5364 #ifdef _LP64 5365 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5366 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5367 match(Set dst (AddReductionVI src1 src2)); 5368 match(Set dst (AndReductionV src1 src2)); 5369 match(Set dst ( OrReductionV src1 src2)); 5370 match(Set dst (XorReductionV src1 src2)); 5371 match(Set dst (MinReductionV src1 src2)); 5372 match(Set dst (MaxReductionV src1 src2)); 5373 effect(TEMP vtmp1, TEMP vtmp2); 5374 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5375 ins_encode %{ 5376 int opcode = this->ideal_Opcode(); 5377 int vlen = Matcher::vector_length(this, $src2); 5378 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5379 %} 5380 ins_pipe( pipe_slow ); 5381 %} 5382 5383 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5384 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5385 match(Set dst (AddReductionVI src1 src2)); 5386 match(Set dst (AndReductionV src1 src2)); 5387 match(Set dst ( OrReductionV src1 src2)); 5388 match(Set dst (XorReductionV src1 src2)); 5389 match(Set dst (MinReductionV src1 src2)); 5390 match(Set dst (MaxReductionV src1 src2)); 5391 effect(TEMP vtmp1, TEMP vtmp2); 5392 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5393 ins_encode %{ 5394 int opcode = this->ideal_Opcode(); 5395 int vlen = Matcher::vector_length(this, $src2); 5396 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 #endif 5401 5402 // =======================Short Reduction========================================== 5403 5404 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5405 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5406 match(Set dst (AddReductionVI src1 src2)); 5407 match(Set dst (MulReductionVI src1 src2)); 5408 match(Set dst (AndReductionV src1 src2)); 5409 match(Set dst ( OrReductionV src1 src2)); 5410 match(Set dst (XorReductionV src1 src2)); 5411 match(Set dst (MinReductionV src1 src2)); 5412 match(Set dst (MaxReductionV src1 src2)); 5413 effect(TEMP vtmp1, TEMP vtmp2); 5414 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5415 ins_encode %{ 5416 int opcode = this->ideal_Opcode(); 5417 int vlen = Matcher::vector_length(this, $src2); 5418 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5419 %} 5420 ins_pipe( pipe_slow ); 5421 %} 5422 5423 // =======================Mul Reduction========================================== 5424 5425 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5426 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5427 Matcher::vector_length(n->in(2)) <= 32); // src2 5428 match(Set dst (MulReductionVI src1 src2)); 5429 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5430 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5431 ins_encode %{ 5432 int opcode = this->ideal_Opcode(); 5433 int vlen = Matcher::vector_length(this, $src2); 5434 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5435 %} 5436 ins_pipe( pipe_slow ); 5437 %} 5438 5439 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5440 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5441 Matcher::vector_length(n->in(2)) == 64); // src2 5442 match(Set dst (MulReductionVI src1 src2)); 5443 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5444 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5445 ins_encode %{ 5446 int opcode = this->ideal_Opcode(); 5447 int vlen = Matcher::vector_length(this, $src2); 5448 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5449 %} 5450 ins_pipe( pipe_slow ); 5451 %} 5452 5453 //--------------------Min/Max Float Reduction -------------------- 5454 // Float Min Reduction 5455 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5456 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5457 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5458 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5459 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5460 Matcher::vector_length(n->in(2)) == 2); 5461 match(Set dst (MinReductionV src1 src2)); 5462 match(Set dst (MaxReductionV src1 src2)); 5463 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5464 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5465 ins_encode %{ 5466 assert(UseAVX > 0, "sanity"); 5467 5468 int opcode = this->ideal_Opcode(); 5469 int vlen = Matcher::vector_length(this, $src2); 5470 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5471 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5472 %} 5473 ins_pipe( pipe_slow ); 5474 %} 5475 5476 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5477 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5478 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5479 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5480 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5481 Matcher::vector_length(n->in(2)) >= 4); 5482 match(Set dst (MinReductionV src1 src2)); 5483 match(Set dst (MaxReductionV src1 src2)); 5484 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5485 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5486 ins_encode %{ 5487 assert(UseAVX > 0, "sanity"); 5488 5489 int opcode = this->ideal_Opcode(); 5490 int vlen = Matcher::vector_length(this, $src2); 5491 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5492 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5498 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5499 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5500 Matcher::vector_length(n->in(2)) == 2); 5501 match(Set dst (MinReductionV dst src)); 5502 match(Set dst (MaxReductionV dst src)); 5503 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5504 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5505 ins_encode %{ 5506 assert(UseAVX > 0, "sanity"); 5507 5508 int opcode = this->ideal_Opcode(); 5509 int vlen = Matcher::vector_length(this, $src); 5510 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5511 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5512 %} 5513 ins_pipe( pipe_slow ); 5514 %} 5515 5516 5517 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5518 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5519 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5520 Matcher::vector_length(n->in(2)) >= 4); 5521 match(Set dst (MinReductionV dst src)); 5522 match(Set dst (MaxReductionV dst src)); 5523 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5524 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5525 ins_encode %{ 5526 assert(UseAVX > 0, "sanity"); 5527 5528 int opcode = this->ideal_Opcode(); 5529 int vlen = Matcher::vector_length(this, $src); 5530 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5531 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 5537 //--------------------Min Double Reduction -------------------- 5538 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5539 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5540 rFlagsReg cr) %{ 5541 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5542 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5543 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5544 Matcher::vector_length(n->in(2)) == 2); 5545 match(Set dst (MinReductionV src1 src2)); 5546 match(Set dst (MaxReductionV src1 src2)); 5547 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5548 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5549 ins_encode %{ 5550 assert(UseAVX > 0, "sanity"); 5551 5552 int opcode = this->ideal_Opcode(); 5553 int vlen = Matcher::vector_length(this, $src2); 5554 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5555 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5556 %} 5557 ins_pipe( pipe_slow ); 5558 %} 5559 5560 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5561 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5562 rFlagsReg cr) %{ 5563 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5564 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5565 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5566 Matcher::vector_length(n->in(2)) >= 4); 5567 match(Set dst (MinReductionV src1 src2)); 5568 match(Set dst (MaxReductionV src1 src2)); 5569 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5570 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5571 ins_encode %{ 5572 assert(UseAVX > 0, "sanity"); 5573 5574 int opcode = this->ideal_Opcode(); 5575 int vlen = Matcher::vector_length(this, $src2); 5576 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5577 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 5583 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5584 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5585 rFlagsReg cr) %{ 5586 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5587 Matcher::vector_length(n->in(2)) == 2); 5588 match(Set dst (MinReductionV dst src)); 5589 match(Set dst (MaxReductionV dst src)); 5590 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5591 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5592 ins_encode %{ 5593 assert(UseAVX > 0, "sanity"); 5594 5595 int opcode = this->ideal_Opcode(); 5596 int vlen = Matcher::vector_length(this, $src); 5597 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5598 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5599 %} 5600 ins_pipe( pipe_slow ); 5601 %} 5602 5603 instruct minmax_reductionD_av(legRegD dst, legVec src, 5604 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5605 rFlagsReg cr) %{ 5606 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5607 Matcher::vector_length(n->in(2)) >= 4); 5608 match(Set dst (MinReductionV dst src)); 5609 match(Set dst (MaxReductionV dst src)); 5610 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5611 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5612 ins_encode %{ 5613 assert(UseAVX > 0, "sanity"); 5614 5615 int opcode = this->ideal_Opcode(); 5616 int vlen = Matcher::vector_length(this, $src); 5617 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5618 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5619 %} 5620 ins_pipe( pipe_slow ); 5621 %} 5622 5623 // ====================VECTOR ARITHMETIC======================================= 5624 5625 // --------------------------------- ADD -------------------------------------- 5626 5627 // Bytes vector add 5628 instruct vaddB(vec dst, vec src) %{ 5629 predicate(UseAVX == 0); 5630 match(Set dst (AddVB dst src)); 5631 format %{ "paddb $dst,$src\t! add packedB" %} 5632 ins_encode %{ 5633 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5634 %} 5635 ins_pipe( pipe_slow ); 5636 %} 5637 5638 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5639 predicate(UseAVX > 0); 5640 match(Set dst (AddVB src1 src2)); 5641 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5642 ins_encode %{ 5643 int vlen_enc = vector_length_encoding(this); 5644 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5650 predicate((UseAVX > 0) && 5651 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5652 match(Set dst (AddVB src (LoadVector mem))); 5653 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5654 ins_encode %{ 5655 int vlen_enc = vector_length_encoding(this); 5656 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5657 %} 5658 ins_pipe( pipe_slow ); 5659 %} 5660 5661 // Shorts/Chars vector add 5662 instruct vaddS(vec dst, vec src) %{ 5663 predicate(UseAVX == 0); 5664 match(Set dst (AddVS dst src)); 5665 format %{ "paddw $dst,$src\t! add packedS" %} 5666 ins_encode %{ 5667 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5673 predicate(UseAVX > 0); 5674 match(Set dst (AddVS src1 src2)); 5675 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5676 ins_encode %{ 5677 int vlen_enc = vector_length_encoding(this); 5678 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5682 5683 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5684 predicate((UseAVX > 0) && 5685 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5686 match(Set dst (AddVS src (LoadVector mem))); 5687 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5688 ins_encode %{ 5689 int vlen_enc = vector_length_encoding(this); 5690 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 // Integers vector add 5696 instruct vaddI(vec dst, vec src) %{ 5697 predicate(UseAVX == 0); 5698 match(Set dst (AddVI dst src)); 5699 format %{ "paddd $dst,$src\t! add packedI" %} 5700 ins_encode %{ 5701 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5707 predicate(UseAVX > 0); 5708 match(Set dst (AddVI src1 src2)); 5709 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5710 ins_encode %{ 5711 int vlen_enc = vector_length_encoding(this); 5712 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5713 %} 5714 ins_pipe( pipe_slow ); 5715 %} 5716 5717 5718 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5719 predicate((UseAVX > 0) && 5720 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5721 match(Set dst (AddVI src (LoadVector mem))); 5722 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5723 ins_encode %{ 5724 int vlen_enc = vector_length_encoding(this); 5725 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5726 %} 5727 ins_pipe( pipe_slow ); 5728 %} 5729 5730 // Longs vector add 5731 instruct vaddL(vec dst, vec src) %{ 5732 predicate(UseAVX == 0); 5733 match(Set dst (AddVL dst src)); 5734 format %{ "paddq $dst,$src\t! add packedL" %} 5735 ins_encode %{ 5736 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5737 %} 5738 ins_pipe( pipe_slow ); 5739 %} 5740 5741 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5742 predicate(UseAVX > 0); 5743 match(Set dst (AddVL src1 src2)); 5744 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5745 ins_encode %{ 5746 int vlen_enc = vector_length_encoding(this); 5747 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5748 %} 5749 ins_pipe( pipe_slow ); 5750 %} 5751 5752 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5753 predicate((UseAVX > 0) && 5754 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5755 match(Set dst (AddVL src (LoadVector mem))); 5756 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5757 ins_encode %{ 5758 int vlen_enc = vector_length_encoding(this); 5759 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5760 %} 5761 ins_pipe( pipe_slow ); 5762 %} 5763 5764 // Floats vector add 5765 instruct vaddF(vec dst, vec src) %{ 5766 predicate(UseAVX == 0); 5767 match(Set dst (AddVF dst src)); 5768 format %{ "addps $dst,$src\t! add packedF" %} 5769 ins_encode %{ 5770 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5771 %} 5772 ins_pipe( pipe_slow ); 5773 %} 5774 5775 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5776 predicate(UseAVX > 0); 5777 match(Set dst (AddVF src1 src2)); 5778 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5779 ins_encode %{ 5780 int vlen_enc = vector_length_encoding(this); 5781 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5782 %} 5783 ins_pipe( pipe_slow ); 5784 %} 5785 5786 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5787 predicate((UseAVX > 0) && 5788 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5789 match(Set dst (AddVF src (LoadVector mem))); 5790 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5791 ins_encode %{ 5792 int vlen_enc = vector_length_encoding(this); 5793 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 // Doubles vector add 5799 instruct vaddD(vec dst, vec src) %{ 5800 predicate(UseAVX == 0); 5801 match(Set dst (AddVD dst src)); 5802 format %{ "addpd $dst,$src\t! add packedD" %} 5803 ins_encode %{ 5804 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5805 %} 5806 ins_pipe( pipe_slow ); 5807 %} 5808 5809 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5810 predicate(UseAVX > 0); 5811 match(Set dst (AddVD src1 src2)); 5812 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5813 ins_encode %{ 5814 int vlen_enc = vector_length_encoding(this); 5815 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5816 %} 5817 ins_pipe( pipe_slow ); 5818 %} 5819 5820 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5821 predicate((UseAVX > 0) && 5822 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5823 match(Set dst (AddVD src (LoadVector mem))); 5824 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5825 ins_encode %{ 5826 int vlen_enc = vector_length_encoding(this); 5827 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5828 %} 5829 ins_pipe( pipe_slow ); 5830 %} 5831 5832 // --------------------------------- SUB -------------------------------------- 5833 5834 // Bytes vector sub 5835 instruct vsubB(vec dst, vec src) %{ 5836 predicate(UseAVX == 0); 5837 match(Set dst (SubVB dst src)); 5838 format %{ "psubb $dst,$src\t! sub packedB" %} 5839 ins_encode %{ 5840 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5841 %} 5842 ins_pipe( pipe_slow ); 5843 %} 5844 5845 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5846 predicate(UseAVX > 0); 5847 match(Set dst (SubVB src1 src2)); 5848 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5849 ins_encode %{ 5850 int vlen_enc = vector_length_encoding(this); 5851 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5857 predicate((UseAVX > 0) && 5858 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5859 match(Set dst (SubVB src (LoadVector mem))); 5860 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5861 ins_encode %{ 5862 int vlen_enc = vector_length_encoding(this); 5863 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5864 %} 5865 ins_pipe( pipe_slow ); 5866 %} 5867 5868 // Shorts/Chars vector sub 5869 instruct vsubS(vec dst, vec src) %{ 5870 predicate(UseAVX == 0); 5871 match(Set dst (SubVS dst src)); 5872 format %{ "psubw $dst,$src\t! sub packedS" %} 5873 ins_encode %{ 5874 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 5880 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5881 predicate(UseAVX > 0); 5882 match(Set dst (SubVS src1 src2)); 5883 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5884 ins_encode %{ 5885 int vlen_enc = vector_length_encoding(this); 5886 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5892 predicate((UseAVX > 0) && 5893 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5894 match(Set dst (SubVS src (LoadVector mem))); 5895 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5896 ins_encode %{ 5897 int vlen_enc = vector_length_encoding(this); 5898 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5899 %} 5900 ins_pipe( pipe_slow ); 5901 %} 5902 5903 // Integers vector sub 5904 instruct vsubI(vec dst, vec src) %{ 5905 predicate(UseAVX == 0); 5906 match(Set dst (SubVI dst src)); 5907 format %{ "psubd $dst,$src\t! sub packedI" %} 5908 ins_encode %{ 5909 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5915 predicate(UseAVX > 0); 5916 match(Set dst (SubVI src1 src2)); 5917 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5918 ins_encode %{ 5919 int vlen_enc = vector_length_encoding(this); 5920 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5926 predicate((UseAVX > 0) && 5927 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5928 match(Set dst (SubVI src (LoadVector mem))); 5929 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5930 ins_encode %{ 5931 int vlen_enc = vector_length_encoding(this); 5932 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5933 %} 5934 ins_pipe( pipe_slow ); 5935 %} 5936 5937 // Longs vector sub 5938 instruct vsubL(vec dst, vec src) %{ 5939 predicate(UseAVX == 0); 5940 match(Set dst (SubVL dst src)); 5941 format %{ "psubq $dst,$src\t! sub packedL" %} 5942 ins_encode %{ 5943 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5949 predicate(UseAVX > 0); 5950 match(Set dst (SubVL src1 src2)); 5951 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5952 ins_encode %{ 5953 int vlen_enc = vector_length_encoding(this); 5954 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 5960 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5961 predicate((UseAVX > 0) && 5962 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5963 match(Set dst (SubVL src (LoadVector mem))); 5964 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5965 ins_encode %{ 5966 int vlen_enc = vector_length_encoding(this); 5967 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5968 %} 5969 ins_pipe( pipe_slow ); 5970 %} 5971 5972 // Floats vector sub 5973 instruct vsubF(vec dst, vec src) %{ 5974 predicate(UseAVX == 0); 5975 match(Set dst (SubVF dst src)); 5976 format %{ "subps $dst,$src\t! sub packedF" %} 5977 ins_encode %{ 5978 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5979 %} 5980 ins_pipe( pipe_slow ); 5981 %} 5982 5983 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5984 predicate(UseAVX > 0); 5985 match(Set dst (SubVF src1 src2)); 5986 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5987 ins_encode %{ 5988 int vlen_enc = vector_length_encoding(this); 5989 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5995 predicate((UseAVX > 0) && 5996 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5997 match(Set dst (SubVF src (LoadVector mem))); 5998 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5999 ins_encode %{ 6000 int vlen_enc = vector_length_encoding(this); 6001 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6002 %} 6003 ins_pipe( pipe_slow ); 6004 %} 6005 6006 // Doubles vector sub 6007 instruct vsubD(vec dst, vec src) %{ 6008 predicate(UseAVX == 0); 6009 match(Set dst (SubVD dst src)); 6010 format %{ "subpd $dst,$src\t! sub packedD" %} 6011 ins_encode %{ 6012 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6013 %} 6014 ins_pipe( pipe_slow ); 6015 %} 6016 6017 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6018 predicate(UseAVX > 0); 6019 match(Set dst (SubVD src1 src2)); 6020 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6021 ins_encode %{ 6022 int vlen_enc = vector_length_encoding(this); 6023 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6024 %} 6025 ins_pipe( pipe_slow ); 6026 %} 6027 6028 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6029 predicate((UseAVX > 0) && 6030 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6031 match(Set dst (SubVD src (LoadVector mem))); 6032 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6033 ins_encode %{ 6034 int vlen_enc = vector_length_encoding(this); 6035 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6036 %} 6037 ins_pipe( pipe_slow ); 6038 %} 6039 6040 // --------------------------------- MUL -------------------------------------- 6041 6042 // Byte vector mul 6043 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6044 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6045 match(Set dst (MulVB src1 src2)); 6046 effect(TEMP dst, TEMP xtmp); 6047 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6048 ins_encode %{ 6049 assert(UseSSE > 3, "required"); 6050 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6051 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6052 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6053 __ psllw($dst$$XMMRegister, 8); 6054 __ psrlw($dst$$XMMRegister, 8); 6055 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6061 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6062 match(Set dst (MulVB src1 src2)); 6063 effect(TEMP dst, TEMP xtmp); 6064 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6065 ins_encode %{ 6066 assert(UseSSE > 3, "required"); 6067 // Odd-index elements 6068 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6069 __ psrlw($dst$$XMMRegister, 8); 6070 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6071 __ psrlw($xtmp$$XMMRegister, 8); 6072 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6073 __ psllw($dst$$XMMRegister, 8); 6074 // Even-index elements 6075 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6076 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6077 __ psllw($xtmp$$XMMRegister, 8); 6078 __ psrlw($xtmp$$XMMRegister, 8); 6079 // Combine 6080 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6081 %} 6082 ins_pipe( pipe_slow ); 6083 %} 6084 6085 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6086 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6087 match(Set dst (MulVB src1 src2)); 6088 effect(TEMP xtmp1, TEMP xtmp2); 6089 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6090 ins_encode %{ 6091 int vlen_enc = vector_length_encoding(this); 6092 // Odd-index elements 6093 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6094 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6095 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6096 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6097 // Even-index elements 6098 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6099 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6100 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6101 // Combine 6102 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6103 %} 6104 ins_pipe( pipe_slow ); 6105 %} 6106 6107 // Shorts/Chars vector mul 6108 instruct vmulS(vec dst, vec src) %{ 6109 predicate(UseAVX == 0); 6110 match(Set dst (MulVS dst src)); 6111 format %{ "pmullw $dst,$src\t! mul packedS" %} 6112 ins_encode %{ 6113 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6114 %} 6115 ins_pipe( pipe_slow ); 6116 %} 6117 6118 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6119 predicate(UseAVX > 0); 6120 match(Set dst (MulVS src1 src2)); 6121 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6122 ins_encode %{ 6123 int vlen_enc = vector_length_encoding(this); 6124 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6125 %} 6126 ins_pipe( pipe_slow ); 6127 %} 6128 6129 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6130 predicate((UseAVX > 0) && 6131 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6132 match(Set dst (MulVS src (LoadVector mem))); 6133 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6134 ins_encode %{ 6135 int vlen_enc = vector_length_encoding(this); 6136 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6137 %} 6138 ins_pipe( pipe_slow ); 6139 %} 6140 6141 // Integers vector mul 6142 instruct vmulI(vec dst, vec src) %{ 6143 predicate(UseAVX == 0); 6144 match(Set dst (MulVI dst src)); 6145 format %{ "pmulld $dst,$src\t! mul packedI" %} 6146 ins_encode %{ 6147 assert(UseSSE > 3, "required"); 6148 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6154 predicate(UseAVX > 0); 6155 match(Set dst (MulVI src1 src2)); 6156 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6157 ins_encode %{ 6158 int vlen_enc = vector_length_encoding(this); 6159 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6165 predicate((UseAVX > 0) && 6166 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6167 match(Set dst (MulVI src (LoadVector mem))); 6168 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6169 ins_encode %{ 6170 int vlen_enc = vector_length_encoding(this); 6171 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 // Longs vector mul 6177 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6178 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6179 VM_Version::supports_avx512dq()) || 6180 VM_Version::supports_avx512vldq()); 6181 match(Set dst (MulVL src1 src2)); 6182 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6183 ins_encode %{ 6184 assert(UseAVX > 2, "required"); 6185 int vlen_enc = vector_length_encoding(this); 6186 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6192 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6193 VM_Version::supports_avx512dq()) || 6194 (Matcher::vector_length_in_bytes(n) > 8 && 6195 VM_Version::supports_avx512vldq())); 6196 match(Set dst (MulVL src (LoadVector mem))); 6197 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6198 ins_encode %{ 6199 assert(UseAVX > 2, "required"); 6200 int vlen_enc = vector_length_encoding(this); 6201 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6207 predicate(UseAVX == 0); 6208 match(Set dst (MulVL src1 src2)); 6209 effect(TEMP dst, TEMP xtmp); 6210 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6211 ins_encode %{ 6212 assert(VM_Version::supports_sse4_1(), "required"); 6213 // Get the lo-hi products, only the lower 32 bits is in concerns 6214 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6215 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6216 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6217 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6218 __ psllq($dst$$XMMRegister, 32); 6219 // Get the lo-lo products 6220 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6221 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6222 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6228 predicate(UseAVX > 0 && 6229 ((Matcher::vector_length_in_bytes(n) == 64 && 6230 !VM_Version::supports_avx512dq()) || 6231 (Matcher::vector_length_in_bytes(n) < 64 && 6232 !VM_Version::supports_avx512vldq()))); 6233 match(Set dst (MulVL src1 src2)); 6234 effect(TEMP xtmp1, TEMP xtmp2); 6235 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6236 ins_encode %{ 6237 int vlen_enc = vector_length_encoding(this); 6238 // Get the lo-hi products, only the lower 32 bits is in concerns 6239 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6240 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6241 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6242 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6243 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6244 // Get the lo-lo products 6245 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6246 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6247 %} 6248 ins_pipe( pipe_slow ); 6249 %} 6250 6251 // Floats vector mul 6252 instruct vmulF(vec dst, vec src) %{ 6253 predicate(UseAVX == 0); 6254 match(Set dst (MulVF dst src)); 6255 format %{ "mulps $dst,$src\t! mul packedF" %} 6256 ins_encode %{ 6257 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6263 predicate(UseAVX > 0); 6264 match(Set dst (MulVF src1 src2)); 6265 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6266 ins_encode %{ 6267 int vlen_enc = vector_length_encoding(this); 6268 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6274 predicate((UseAVX > 0) && 6275 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6276 match(Set dst (MulVF src (LoadVector mem))); 6277 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6278 ins_encode %{ 6279 int vlen_enc = vector_length_encoding(this); 6280 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 // Doubles vector mul 6286 instruct vmulD(vec dst, vec src) %{ 6287 predicate(UseAVX == 0); 6288 match(Set dst (MulVD dst src)); 6289 format %{ "mulpd $dst,$src\t! mul packedD" %} 6290 ins_encode %{ 6291 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6297 predicate(UseAVX > 0); 6298 match(Set dst (MulVD src1 src2)); 6299 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6300 ins_encode %{ 6301 int vlen_enc = vector_length_encoding(this); 6302 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6308 predicate((UseAVX > 0) && 6309 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6310 match(Set dst (MulVD src (LoadVector mem))); 6311 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6312 ins_encode %{ 6313 int vlen_enc = vector_length_encoding(this); 6314 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 // --------------------------------- DIV -------------------------------------- 6320 6321 // Floats vector div 6322 instruct vdivF(vec dst, vec src) %{ 6323 predicate(UseAVX == 0); 6324 match(Set dst (DivVF dst src)); 6325 format %{ "divps $dst,$src\t! div packedF" %} 6326 ins_encode %{ 6327 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6333 predicate(UseAVX > 0); 6334 match(Set dst (DivVF src1 src2)); 6335 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6336 ins_encode %{ 6337 int vlen_enc = vector_length_encoding(this); 6338 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6344 predicate((UseAVX > 0) && 6345 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6346 match(Set dst (DivVF src (LoadVector mem))); 6347 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6348 ins_encode %{ 6349 int vlen_enc = vector_length_encoding(this); 6350 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6351 %} 6352 ins_pipe( pipe_slow ); 6353 %} 6354 6355 // Doubles vector div 6356 instruct vdivD(vec dst, vec src) %{ 6357 predicate(UseAVX == 0); 6358 match(Set dst (DivVD dst src)); 6359 format %{ "divpd $dst,$src\t! div packedD" %} 6360 ins_encode %{ 6361 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6367 predicate(UseAVX > 0); 6368 match(Set dst (DivVD src1 src2)); 6369 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6370 ins_encode %{ 6371 int vlen_enc = vector_length_encoding(this); 6372 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6378 predicate((UseAVX > 0) && 6379 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6380 match(Set dst (DivVD src (LoadVector mem))); 6381 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6382 ins_encode %{ 6383 int vlen_enc = vector_length_encoding(this); 6384 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6385 %} 6386 ins_pipe( pipe_slow ); 6387 %} 6388 6389 // ------------------------------ MinMax --------------------------------------- 6390 6391 // Byte, Short, Int vector Min/Max 6392 instruct minmax_reg_sse(vec dst, vec src) %{ 6393 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6394 UseAVX == 0); 6395 match(Set dst (MinV dst src)); 6396 match(Set dst (MaxV dst src)); 6397 format %{ "vector_minmax $dst,$src\t! " %} 6398 ins_encode %{ 6399 assert(UseSSE >= 4, "required"); 6400 6401 int opcode = this->ideal_Opcode(); 6402 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6403 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6404 %} 6405 ins_pipe( pipe_slow ); 6406 %} 6407 6408 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6409 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6410 UseAVX > 0); 6411 match(Set dst (MinV src1 src2)); 6412 match(Set dst (MaxV src1 src2)); 6413 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6414 ins_encode %{ 6415 int opcode = this->ideal_Opcode(); 6416 int vlen_enc = vector_length_encoding(this); 6417 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6418 6419 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6420 %} 6421 ins_pipe( pipe_slow ); 6422 %} 6423 6424 // Long vector Min/Max 6425 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6426 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6427 UseAVX == 0); 6428 match(Set dst (MinV dst src)); 6429 match(Set dst (MaxV src dst)); 6430 effect(TEMP dst, TEMP tmp); 6431 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6432 ins_encode %{ 6433 assert(UseSSE >= 4, "required"); 6434 6435 int opcode = this->ideal_Opcode(); 6436 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6437 assert(elem_bt == T_LONG, "sanity"); 6438 6439 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6445 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6446 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6447 match(Set dst (MinV src1 src2)); 6448 match(Set dst (MaxV src1 src2)); 6449 effect(TEMP dst); 6450 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6451 ins_encode %{ 6452 int vlen_enc = vector_length_encoding(this); 6453 int opcode = this->ideal_Opcode(); 6454 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6455 assert(elem_bt == T_LONG, "sanity"); 6456 6457 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6463 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6464 Matcher::vector_element_basic_type(n) == T_LONG); 6465 match(Set dst (MinV src1 src2)); 6466 match(Set dst (MaxV src1 src2)); 6467 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6468 ins_encode %{ 6469 assert(UseAVX > 2, "required"); 6470 6471 int vlen_enc = vector_length_encoding(this); 6472 int opcode = this->ideal_Opcode(); 6473 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6474 assert(elem_bt == T_LONG, "sanity"); 6475 6476 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 // Float/Double vector Min/Max 6482 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6483 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6484 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6485 UseAVX > 0); 6486 match(Set dst (MinV a b)); 6487 match(Set dst (MaxV a b)); 6488 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6489 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6490 ins_encode %{ 6491 assert(UseAVX > 0, "required"); 6492 6493 int opcode = this->ideal_Opcode(); 6494 int vlen_enc = vector_length_encoding(this); 6495 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6496 6497 __ vminmax_fp(opcode, elem_bt, 6498 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6499 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6505 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6506 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6507 match(Set dst (MinV a b)); 6508 match(Set dst (MaxV a b)); 6509 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6510 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6511 ins_encode %{ 6512 assert(UseAVX > 2, "required"); 6513 6514 int opcode = this->ideal_Opcode(); 6515 int vlen_enc = vector_length_encoding(this); 6516 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6517 6518 __ evminmax_fp(opcode, elem_bt, 6519 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6520 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 // ------------------------------ Unsigned vector Min/Max ---------------------- 6526 6527 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6528 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6529 match(Set dst (UMinV a b)); 6530 match(Set dst (UMaxV a b)); 6531 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6532 ins_encode %{ 6533 int opcode = this->ideal_Opcode(); 6534 int vlen_enc = vector_length_encoding(this); 6535 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6536 assert(is_integral_type(elem_bt), ""); 6537 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6538 %} 6539 ins_pipe( pipe_slow ); 6540 %} 6541 6542 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6543 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6544 match(Set dst (UMinV a (LoadVector b))); 6545 match(Set dst (UMaxV a (LoadVector b))); 6546 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6547 ins_encode %{ 6548 int opcode = this->ideal_Opcode(); 6549 int vlen_enc = vector_length_encoding(this); 6550 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6551 assert(is_integral_type(elem_bt), ""); 6552 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6558 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6559 match(Set dst (UMinV a b)); 6560 match(Set dst (UMaxV a b)); 6561 effect(TEMP xtmp1, TEMP xtmp2); 6562 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6563 ins_encode %{ 6564 int opcode = this->ideal_Opcode(); 6565 int vlen_enc = vector_length_encoding(this); 6566 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6572 match(Set dst (UMinV (Binary dst src2) mask)); 6573 match(Set dst (UMaxV (Binary dst src2) mask)); 6574 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6575 ins_encode %{ 6576 int vlen_enc = vector_length_encoding(this); 6577 BasicType bt = Matcher::vector_element_basic_type(this); 6578 int opc = this->ideal_Opcode(); 6579 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6580 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6581 %} 6582 ins_pipe( pipe_slow ); 6583 %} 6584 6585 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6586 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6587 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6588 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6589 ins_encode %{ 6590 int vlen_enc = vector_length_encoding(this); 6591 BasicType bt = Matcher::vector_element_basic_type(this); 6592 int opc = this->ideal_Opcode(); 6593 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6594 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6595 %} 6596 ins_pipe( pipe_slow ); 6597 %} 6598 6599 // --------------------------------- Signum/CopySign --------------------------- 6600 6601 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6602 match(Set dst (SignumF dst (Binary zero one))); 6603 effect(KILL cr); 6604 format %{ "signumF $dst, $dst" %} 6605 ins_encode %{ 6606 int opcode = this->ideal_Opcode(); 6607 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6613 match(Set dst (SignumD dst (Binary zero one))); 6614 effect(KILL cr); 6615 format %{ "signumD $dst, $dst" %} 6616 ins_encode %{ 6617 int opcode = this->ideal_Opcode(); 6618 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6624 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6625 match(Set dst (SignumVF src (Binary zero one))); 6626 match(Set dst (SignumVD src (Binary zero one))); 6627 effect(TEMP dst, TEMP xtmp1); 6628 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6629 ins_encode %{ 6630 int opcode = this->ideal_Opcode(); 6631 int vec_enc = vector_length_encoding(this); 6632 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6633 $xtmp1$$XMMRegister, vec_enc); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6639 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6640 match(Set dst (SignumVF src (Binary zero one))); 6641 match(Set dst (SignumVD src (Binary zero one))); 6642 effect(TEMP dst, TEMP ktmp1); 6643 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6644 ins_encode %{ 6645 int opcode = this->ideal_Opcode(); 6646 int vec_enc = vector_length_encoding(this); 6647 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6648 $ktmp1$$KRegister, vec_enc); 6649 %} 6650 ins_pipe( pipe_slow ); 6651 %} 6652 6653 // --------------------------------------- 6654 // For copySign use 0xE4 as writemask for vpternlog 6655 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6656 // C (xmm2) is set to 0x7FFFFFFF 6657 // Wherever xmm2 is 0, we want to pick from B (sign) 6658 // Wherever xmm2 is 1, we want to pick from A (src) 6659 // 6660 // A B C Result 6661 // 0 0 0 0 6662 // 0 0 1 0 6663 // 0 1 0 1 6664 // 0 1 1 0 6665 // 1 0 0 0 6666 // 1 0 1 1 6667 // 1 1 0 1 6668 // 1 1 1 1 6669 // 6670 // Result going from high bit to low bit is 0x11100100 = 0xe4 6671 // --------------------------------------- 6672 6673 #ifdef _LP64 6674 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6675 match(Set dst (CopySignF dst src)); 6676 effect(TEMP tmp1, TEMP tmp2); 6677 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6678 ins_encode %{ 6679 __ movl($tmp2$$Register, 0x7FFFFFFF); 6680 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6681 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6682 %} 6683 ins_pipe( pipe_slow ); 6684 %} 6685 6686 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6687 match(Set dst (CopySignD dst (Binary src zero))); 6688 ins_cost(100); 6689 effect(TEMP tmp1, TEMP tmp2); 6690 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6691 ins_encode %{ 6692 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6693 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6694 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 #endif // _LP64 6700 6701 //----------------------------- CompressBits/ExpandBits ------------------------ 6702 6703 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6704 predicate(n->bottom_type()->isa_int()); 6705 match(Set dst (CompressBits src mask)); 6706 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6707 ins_encode %{ 6708 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6714 predicate(n->bottom_type()->isa_int()); 6715 match(Set dst (ExpandBits src mask)); 6716 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6717 ins_encode %{ 6718 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6719 %} 6720 ins_pipe( pipe_slow ); 6721 %} 6722 6723 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6724 predicate(n->bottom_type()->isa_int()); 6725 match(Set dst (CompressBits src (LoadI mask))); 6726 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6727 ins_encode %{ 6728 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6729 %} 6730 ins_pipe( pipe_slow ); 6731 %} 6732 6733 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6734 predicate(n->bottom_type()->isa_int()); 6735 match(Set dst (ExpandBits src (LoadI mask))); 6736 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6737 ins_encode %{ 6738 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6739 %} 6740 ins_pipe( pipe_slow ); 6741 %} 6742 6743 // --------------------------------- Sqrt -------------------------------------- 6744 6745 instruct vsqrtF_reg(vec dst, vec src) %{ 6746 match(Set dst (SqrtVF src)); 6747 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6748 ins_encode %{ 6749 assert(UseAVX > 0, "required"); 6750 int vlen_enc = vector_length_encoding(this); 6751 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vsqrtF_mem(vec dst, memory mem) %{ 6757 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6758 match(Set dst (SqrtVF (LoadVector mem))); 6759 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6760 ins_encode %{ 6761 assert(UseAVX > 0, "required"); 6762 int vlen_enc = vector_length_encoding(this); 6763 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6764 %} 6765 ins_pipe( pipe_slow ); 6766 %} 6767 6768 // Floating point vector sqrt 6769 instruct vsqrtD_reg(vec dst, vec src) %{ 6770 match(Set dst (SqrtVD src)); 6771 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6772 ins_encode %{ 6773 assert(UseAVX > 0, "required"); 6774 int vlen_enc = vector_length_encoding(this); 6775 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct vsqrtD_mem(vec dst, memory mem) %{ 6781 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6782 match(Set dst (SqrtVD (LoadVector mem))); 6783 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6784 ins_encode %{ 6785 assert(UseAVX > 0, "required"); 6786 int vlen_enc = vector_length_encoding(this); 6787 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 6792 // ------------------------------ Shift --------------------------------------- 6793 6794 // Left and right shift count vectors are the same on x86 6795 // (only lowest bits of xmm reg are used for count). 6796 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6797 match(Set dst (LShiftCntV cnt)); 6798 match(Set dst (RShiftCntV cnt)); 6799 format %{ "movdl $dst,$cnt\t! load shift count" %} 6800 ins_encode %{ 6801 __ movdl($dst$$XMMRegister, $cnt$$Register); 6802 %} 6803 ins_pipe( pipe_slow ); 6804 %} 6805 6806 // Byte vector shift 6807 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6808 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6809 match(Set dst ( LShiftVB src shift)); 6810 match(Set dst ( RShiftVB src shift)); 6811 match(Set dst (URShiftVB src shift)); 6812 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6813 format %{"vector_byte_shift $dst,$src,$shift" %} 6814 ins_encode %{ 6815 assert(UseSSE > 3, "required"); 6816 int opcode = this->ideal_Opcode(); 6817 bool sign = (opcode != Op_URShiftVB); 6818 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6819 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6820 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6821 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6822 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6828 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6829 UseAVX <= 1); 6830 match(Set dst ( LShiftVB src shift)); 6831 match(Set dst ( RShiftVB src shift)); 6832 match(Set dst (URShiftVB src shift)); 6833 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6834 format %{"vector_byte_shift $dst,$src,$shift" %} 6835 ins_encode %{ 6836 assert(UseSSE > 3, "required"); 6837 int opcode = this->ideal_Opcode(); 6838 bool sign = (opcode != Op_URShiftVB); 6839 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6840 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6841 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6842 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6843 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6844 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6845 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6846 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6847 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6853 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6854 UseAVX > 1); 6855 match(Set dst ( LShiftVB src shift)); 6856 match(Set dst ( RShiftVB src shift)); 6857 match(Set dst (URShiftVB src shift)); 6858 effect(TEMP dst, TEMP tmp); 6859 format %{"vector_byte_shift $dst,$src,$shift" %} 6860 ins_encode %{ 6861 int opcode = this->ideal_Opcode(); 6862 bool sign = (opcode != Op_URShiftVB); 6863 int vlen_enc = Assembler::AVX_256bit; 6864 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6865 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6866 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6867 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6868 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6874 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6875 match(Set dst ( LShiftVB src shift)); 6876 match(Set dst ( RShiftVB src shift)); 6877 match(Set dst (URShiftVB src shift)); 6878 effect(TEMP dst, TEMP tmp); 6879 format %{"vector_byte_shift $dst,$src,$shift" %} 6880 ins_encode %{ 6881 assert(UseAVX > 1, "required"); 6882 int opcode = this->ideal_Opcode(); 6883 bool sign = (opcode != Op_URShiftVB); 6884 int vlen_enc = Assembler::AVX_256bit; 6885 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6886 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6887 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6888 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6889 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6890 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6891 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6892 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6893 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6894 %} 6895 ins_pipe( pipe_slow ); 6896 %} 6897 6898 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6899 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6900 match(Set dst ( LShiftVB src shift)); 6901 match(Set dst (RShiftVB src shift)); 6902 match(Set dst (URShiftVB src shift)); 6903 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6904 format %{"vector_byte_shift $dst,$src,$shift" %} 6905 ins_encode %{ 6906 assert(UseAVX > 2, "required"); 6907 int opcode = this->ideal_Opcode(); 6908 bool sign = (opcode != Op_URShiftVB); 6909 int vlen_enc = Assembler::AVX_512bit; 6910 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6911 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6912 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6913 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6914 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6915 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6916 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6917 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6918 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6919 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6920 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6921 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6922 %} 6923 ins_pipe( pipe_slow ); 6924 %} 6925 6926 // Shorts vector logical right shift produces incorrect Java result 6927 // for negative data because java code convert short value into int with 6928 // sign extension before a shift. But char vectors are fine since chars are 6929 // unsigned values. 6930 // Shorts/Chars vector left shift 6931 instruct vshiftS(vec dst, vec src, vec shift) %{ 6932 predicate(!n->as_ShiftV()->is_var_shift()); 6933 match(Set dst ( LShiftVS src shift)); 6934 match(Set dst ( RShiftVS src shift)); 6935 match(Set dst (URShiftVS src shift)); 6936 effect(TEMP dst, USE src, USE shift); 6937 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6938 ins_encode %{ 6939 int opcode = this->ideal_Opcode(); 6940 if (UseAVX > 0) { 6941 int vlen_enc = vector_length_encoding(this); 6942 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6943 } else { 6944 int vlen = Matcher::vector_length(this); 6945 if (vlen == 2) { 6946 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6947 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6948 } else if (vlen == 4) { 6949 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6950 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6951 } else { 6952 assert (vlen == 8, "sanity"); 6953 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6954 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6955 } 6956 } 6957 %} 6958 ins_pipe( pipe_slow ); 6959 %} 6960 6961 // Integers vector left shift 6962 instruct vshiftI(vec dst, vec src, vec shift) %{ 6963 predicate(!n->as_ShiftV()->is_var_shift()); 6964 match(Set dst ( LShiftVI src shift)); 6965 match(Set dst ( RShiftVI src shift)); 6966 match(Set dst (URShiftVI src shift)); 6967 effect(TEMP dst, USE src, USE shift); 6968 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6969 ins_encode %{ 6970 int opcode = this->ideal_Opcode(); 6971 if (UseAVX > 0) { 6972 int vlen_enc = vector_length_encoding(this); 6973 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6974 } else { 6975 int vlen = Matcher::vector_length(this); 6976 if (vlen == 2) { 6977 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6978 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6979 } else { 6980 assert(vlen == 4, "sanity"); 6981 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6982 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6983 } 6984 } 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 // Integers vector left constant shift 6990 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6991 match(Set dst (LShiftVI src (LShiftCntV shift))); 6992 match(Set dst (RShiftVI src (RShiftCntV shift))); 6993 match(Set dst (URShiftVI src (RShiftCntV shift))); 6994 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6995 ins_encode %{ 6996 int opcode = this->ideal_Opcode(); 6997 if (UseAVX > 0) { 6998 int vector_len = vector_length_encoding(this); 6999 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7000 } else { 7001 int vlen = Matcher::vector_length(this); 7002 if (vlen == 2) { 7003 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7004 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7005 } else { 7006 assert(vlen == 4, "sanity"); 7007 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7008 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7009 } 7010 } 7011 %} 7012 ins_pipe( pipe_slow ); 7013 %} 7014 7015 // Longs vector shift 7016 instruct vshiftL(vec dst, vec src, vec shift) %{ 7017 predicate(!n->as_ShiftV()->is_var_shift()); 7018 match(Set dst ( LShiftVL src shift)); 7019 match(Set dst (URShiftVL src shift)); 7020 effect(TEMP dst, USE src, USE shift); 7021 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7022 ins_encode %{ 7023 int opcode = this->ideal_Opcode(); 7024 if (UseAVX > 0) { 7025 int vlen_enc = vector_length_encoding(this); 7026 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7027 } else { 7028 assert(Matcher::vector_length(this) == 2, ""); 7029 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7030 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7031 } 7032 %} 7033 ins_pipe( pipe_slow ); 7034 %} 7035 7036 // Longs vector constant shift 7037 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7038 match(Set dst (LShiftVL src (LShiftCntV shift))); 7039 match(Set dst (URShiftVL src (RShiftCntV shift))); 7040 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7041 ins_encode %{ 7042 int opcode = this->ideal_Opcode(); 7043 if (UseAVX > 0) { 7044 int vector_len = vector_length_encoding(this); 7045 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7046 } else { 7047 assert(Matcher::vector_length(this) == 2, ""); 7048 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7049 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7050 } 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 // -------------------ArithmeticRightShift ----------------------------------- 7056 // Long vector arithmetic right shift 7057 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7058 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7059 match(Set dst (RShiftVL src shift)); 7060 effect(TEMP dst, TEMP tmp); 7061 format %{ "vshiftq $dst,$src,$shift" %} 7062 ins_encode %{ 7063 uint vlen = Matcher::vector_length(this); 7064 if (vlen == 2) { 7065 assert(UseSSE >= 2, "required"); 7066 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7067 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7068 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7069 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7070 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7071 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7072 } else { 7073 assert(vlen == 4, "sanity"); 7074 assert(UseAVX > 1, "required"); 7075 int vlen_enc = Assembler::AVX_256bit; 7076 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7077 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7078 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7079 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7080 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7081 } 7082 %} 7083 ins_pipe( pipe_slow ); 7084 %} 7085 7086 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7087 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7088 match(Set dst (RShiftVL src shift)); 7089 format %{ "vshiftq $dst,$src,$shift" %} 7090 ins_encode %{ 7091 int vlen_enc = vector_length_encoding(this); 7092 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7093 %} 7094 ins_pipe( pipe_slow ); 7095 %} 7096 7097 // ------------------- Variable Shift ----------------------------- 7098 // Byte variable shift 7099 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7100 predicate(Matcher::vector_length(n) <= 8 && 7101 n->as_ShiftV()->is_var_shift() && 7102 !VM_Version::supports_avx512bw()); 7103 match(Set dst ( LShiftVB src shift)); 7104 match(Set dst ( RShiftVB src shift)); 7105 match(Set dst (URShiftVB src shift)); 7106 effect(TEMP dst, TEMP vtmp); 7107 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7108 ins_encode %{ 7109 assert(UseAVX >= 2, "required"); 7110 7111 int opcode = this->ideal_Opcode(); 7112 int vlen_enc = Assembler::AVX_128bit; 7113 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7114 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7120 predicate(Matcher::vector_length(n) == 16 && 7121 n->as_ShiftV()->is_var_shift() && 7122 !VM_Version::supports_avx512bw()); 7123 match(Set dst ( LShiftVB src shift)); 7124 match(Set dst ( RShiftVB src shift)); 7125 match(Set dst (URShiftVB src shift)); 7126 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7127 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7128 ins_encode %{ 7129 assert(UseAVX >= 2, "required"); 7130 7131 int opcode = this->ideal_Opcode(); 7132 int vlen_enc = Assembler::AVX_128bit; 7133 // Shift lower half and get word result in dst 7134 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7135 7136 // Shift upper half and get word result in vtmp1 7137 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7138 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7139 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7140 7141 // Merge and down convert the two word results to byte in dst 7142 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7148 predicate(Matcher::vector_length(n) == 32 && 7149 n->as_ShiftV()->is_var_shift() && 7150 !VM_Version::supports_avx512bw()); 7151 match(Set dst ( LShiftVB src shift)); 7152 match(Set dst ( RShiftVB src shift)); 7153 match(Set dst (URShiftVB src shift)); 7154 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7155 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7156 ins_encode %{ 7157 assert(UseAVX >= 2, "required"); 7158 7159 int opcode = this->ideal_Opcode(); 7160 int vlen_enc = Assembler::AVX_128bit; 7161 // Process lower 128 bits and get result in dst 7162 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7163 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7164 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7165 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7166 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7167 7168 // Process higher 128 bits and get result in vtmp3 7169 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7170 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7171 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7172 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7173 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7174 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7175 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7176 7177 // Merge the two results in dst 7178 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7179 %} 7180 ins_pipe( pipe_slow ); 7181 %} 7182 7183 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7184 predicate(Matcher::vector_length(n) <= 32 && 7185 n->as_ShiftV()->is_var_shift() && 7186 VM_Version::supports_avx512bw()); 7187 match(Set dst ( LShiftVB src shift)); 7188 match(Set dst ( RShiftVB src shift)); 7189 match(Set dst (URShiftVB src shift)); 7190 effect(TEMP dst, TEMP vtmp); 7191 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7192 ins_encode %{ 7193 assert(UseAVX > 2, "required"); 7194 7195 int opcode = this->ideal_Opcode(); 7196 int vlen_enc = vector_length_encoding(this); 7197 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7198 %} 7199 ins_pipe( pipe_slow ); 7200 %} 7201 7202 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7203 predicate(Matcher::vector_length(n) == 64 && 7204 n->as_ShiftV()->is_var_shift() && 7205 VM_Version::supports_avx512bw()); 7206 match(Set dst ( LShiftVB src shift)); 7207 match(Set dst ( RShiftVB src shift)); 7208 match(Set dst (URShiftVB src shift)); 7209 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7210 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7211 ins_encode %{ 7212 assert(UseAVX > 2, "required"); 7213 7214 int opcode = this->ideal_Opcode(); 7215 int vlen_enc = Assembler::AVX_256bit; 7216 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7217 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7218 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7219 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7220 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 // Short variable shift 7226 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7227 predicate(Matcher::vector_length(n) <= 8 && 7228 n->as_ShiftV()->is_var_shift() && 7229 !VM_Version::supports_avx512bw()); 7230 match(Set dst ( LShiftVS src shift)); 7231 match(Set dst ( RShiftVS src shift)); 7232 match(Set dst (URShiftVS src shift)); 7233 effect(TEMP dst, TEMP vtmp); 7234 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7235 ins_encode %{ 7236 assert(UseAVX >= 2, "required"); 7237 7238 int opcode = this->ideal_Opcode(); 7239 bool sign = (opcode != Op_URShiftVS); 7240 int vlen_enc = Assembler::AVX_256bit; 7241 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7242 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7243 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7244 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7245 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7246 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7252 predicate(Matcher::vector_length(n) == 16 && 7253 n->as_ShiftV()->is_var_shift() && 7254 !VM_Version::supports_avx512bw()); 7255 match(Set dst ( LShiftVS src shift)); 7256 match(Set dst ( RShiftVS src shift)); 7257 match(Set dst (URShiftVS src shift)); 7258 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7259 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7260 ins_encode %{ 7261 assert(UseAVX >= 2, "required"); 7262 7263 int opcode = this->ideal_Opcode(); 7264 bool sign = (opcode != Op_URShiftVS); 7265 int vlen_enc = Assembler::AVX_256bit; 7266 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7267 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7268 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7269 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7270 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7271 7272 // Shift upper half, with result in dst using vtmp1 as TEMP 7273 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7274 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7275 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7276 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7277 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7278 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7279 7280 // Merge lower and upper half result into dst 7281 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7282 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7283 %} 7284 ins_pipe( pipe_slow ); 7285 %} 7286 7287 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7288 predicate(n->as_ShiftV()->is_var_shift() && 7289 VM_Version::supports_avx512bw()); 7290 match(Set dst ( LShiftVS src shift)); 7291 match(Set dst ( RShiftVS src shift)); 7292 match(Set dst (URShiftVS src shift)); 7293 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7294 ins_encode %{ 7295 assert(UseAVX > 2, "required"); 7296 7297 int opcode = this->ideal_Opcode(); 7298 int vlen_enc = vector_length_encoding(this); 7299 if (!VM_Version::supports_avx512vl()) { 7300 vlen_enc = Assembler::AVX_512bit; 7301 } 7302 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 //Integer variable shift 7308 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7309 predicate(n->as_ShiftV()->is_var_shift()); 7310 match(Set dst ( LShiftVI src shift)); 7311 match(Set dst ( RShiftVI src shift)); 7312 match(Set dst (URShiftVI src shift)); 7313 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7314 ins_encode %{ 7315 assert(UseAVX >= 2, "required"); 7316 7317 int opcode = this->ideal_Opcode(); 7318 int vlen_enc = vector_length_encoding(this); 7319 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7320 %} 7321 ins_pipe( pipe_slow ); 7322 %} 7323 7324 //Long variable shift 7325 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7326 predicate(n->as_ShiftV()->is_var_shift()); 7327 match(Set dst ( LShiftVL src shift)); 7328 match(Set dst (URShiftVL src shift)); 7329 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7330 ins_encode %{ 7331 assert(UseAVX >= 2, "required"); 7332 7333 int opcode = this->ideal_Opcode(); 7334 int vlen_enc = vector_length_encoding(this); 7335 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 //Long variable right shift arithmetic 7341 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7342 predicate(Matcher::vector_length(n) <= 4 && 7343 n->as_ShiftV()->is_var_shift() && 7344 UseAVX == 2); 7345 match(Set dst (RShiftVL src shift)); 7346 effect(TEMP dst, TEMP vtmp); 7347 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7348 ins_encode %{ 7349 int opcode = this->ideal_Opcode(); 7350 int vlen_enc = vector_length_encoding(this); 7351 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7352 $vtmp$$XMMRegister); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7358 predicate(n->as_ShiftV()->is_var_shift() && 7359 UseAVX > 2); 7360 match(Set dst (RShiftVL src shift)); 7361 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7362 ins_encode %{ 7363 int opcode = this->ideal_Opcode(); 7364 int vlen_enc = vector_length_encoding(this); 7365 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 // --------------------------------- AND -------------------------------------- 7371 7372 instruct vand(vec dst, vec src) %{ 7373 predicate(UseAVX == 0); 7374 match(Set dst (AndV dst src)); 7375 format %{ "pand $dst,$src\t! and vectors" %} 7376 ins_encode %{ 7377 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7383 predicate(UseAVX > 0); 7384 match(Set dst (AndV src1 src2)); 7385 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7386 ins_encode %{ 7387 int vlen_enc = vector_length_encoding(this); 7388 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vand_mem(vec dst, vec src, memory mem) %{ 7394 predicate((UseAVX > 0) && 7395 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7396 match(Set dst (AndV src (LoadVector mem))); 7397 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7398 ins_encode %{ 7399 int vlen_enc = vector_length_encoding(this); 7400 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 // --------------------------------- OR --------------------------------------- 7406 7407 instruct vor(vec dst, vec src) %{ 7408 predicate(UseAVX == 0); 7409 match(Set dst (OrV dst src)); 7410 format %{ "por $dst,$src\t! or vectors" %} 7411 ins_encode %{ 7412 __ por($dst$$XMMRegister, $src$$XMMRegister); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7418 predicate(UseAVX > 0); 7419 match(Set dst (OrV src1 src2)); 7420 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7421 ins_encode %{ 7422 int vlen_enc = vector_length_encoding(this); 7423 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vor_mem(vec dst, vec src, memory mem) %{ 7429 predicate((UseAVX > 0) && 7430 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7431 match(Set dst (OrV src (LoadVector mem))); 7432 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7433 ins_encode %{ 7434 int vlen_enc = vector_length_encoding(this); 7435 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7436 %} 7437 ins_pipe( pipe_slow ); 7438 %} 7439 7440 // --------------------------------- XOR -------------------------------------- 7441 7442 instruct vxor(vec dst, vec src) %{ 7443 predicate(UseAVX == 0); 7444 match(Set dst (XorV dst src)); 7445 format %{ "pxor $dst,$src\t! xor vectors" %} 7446 ins_encode %{ 7447 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7448 %} 7449 ins_pipe( pipe_slow ); 7450 %} 7451 7452 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7453 predicate(UseAVX > 0); 7454 match(Set dst (XorV src1 src2)); 7455 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7456 ins_encode %{ 7457 int vlen_enc = vector_length_encoding(this); 7458 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7459 %} 7460 ins_pipe( pipe_slow ); 7461 %} 7462 7463 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7464 predicate((UseAVX > 0) && 7465 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7466 match(Set dst (XorV src (LoadVector mem))); 7467 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7468 ins_encode %{ 7469 int vlen_enc = vector_length_encoding(this); 7470 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7471 %} 7472 ins_pipe( pipe_slow ); 7473 %} 7474 7475 // --------------------------------- VectorCast -------------------------------------- 7476 7477 instruct vcastBtoX(vec dst, vec src) %{ 7478 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7479 match(Set dst (VectorCastB2X src)); 7480 format %{ "vector_cast_b2x $dst,$src\t!" %} 7481 ins_encode %{ 7482 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7483 int vlen_enc = vector_length_encoding(this); 7484 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7485 %} 7486 ins_pipe( pipe_slow ); 7487 %} 7488 7489 instruct vcastBtoD(legVec dst, legVec src) %{ 7490 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7491 match(Set dst (VectorCastB2X src)); 7492 format %{ "vector_cast_b2x $dst,$src\t!" %} 7493 ins_encode %{ 7494 int vlen_enc = vector_length_encoding(this); 7495 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7496 %} 7497 ins_pipe( pipe_slow ); 7498 %} 7499 7500 instruct castStoX(vec dst, vec src) %{ 7501 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7502 Matcher::vector_length(n->in(1)) <= 8 && // src 7503 Matcher::vector_element_basic_type(n) == T_BYTE); 7504 match(Set dst (VectorCastS2X src)); 7505 format %{ "vector_cast_s2x $dst,$src" %} 7506 ins_encode %{ 7507 assert(UseAVX > 0, "required"); 7508 7509 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7510 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7516 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7517 Matcher::vector_length(n->in(1)) == 16 && // src 7518 Matcher::vector_element_basic_type(n) == T_BYTE); 7519 effect(TEMP dst, TEMP vtmp); 7520 match(Set dst (VectorCastS2X src)); 7521 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7522 ins_encode %{ 7523 assert(UseAVX > 0, "required"); 7524 7525 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7526 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7527 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7528 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7529 %} 7530 ins_pipe( pipe_slow ); 7531 %} 7532 7533 instruct vcastStoX_evex(vec dst, vec src) %{ 7534 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7535 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7536 match(Set dst (VectorCastS2X src)); 7537 format %{ "vector_cast_s2x $dst,$src\t!" %} 7538 ins_encode %{ 7539 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7540 int src_vlen_enc = vector_length_encoding(this, $src); 7541 int vlen_enc = vector_length_encoding(this); 7542 switch (to_elem_bt) { 7543 case T_BYTE: 7544 if (!VM_Version::supports_avx512vl()) { 7545 vlen_enc = Assembler::AVX_512bit; 7546 } 7547 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7548 break; 7549 case T_INT: 7550 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7551 break; 7552 case T_FLOAT: 7553 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7554 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7555 break; 7556 case T_LONG: 7557 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7558 break; 7559 case T_DOUBLE: { 7560 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7561 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7562 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7563 break; 7564 } 7565 default: 7566 ShouldNotReachHere(); 7567 } 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 instruct castItoX(vec dst, vec src) %{ 7573 predicate(UseAVX <= 2 && 7574 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7575 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7576 match(Set dst (VectorCastI2X src)); 7577 format %{ "vector_cast_i2x $dst,$src" %} 7578 ins_encode %{ 7579 assert(UseAVX > 0, "required"); 7580 7581 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7582 int vlen_enc = vector_length_encoding(this, $src); 7583 7584 if (to_elem_bt == T_BYTE) { 7585 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7586 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7587 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7588 } else { 7589 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7590 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7591 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7592 } 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7598 predicate(UseAVX <= 2 && 7599 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7600 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7601 match(Set dst (VectorCastI2X src)); 7602 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7603 effect(TEMP dst, TEMP vtmp); 7604 ins_encode %{ 7605 assert(UseAVX > 0, "required"); 7606 7607 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7608 int vlen_enc = vector_length_encoding(this, $src); 7609 7610 if (to_elem_bt == T_BYTE) { 7611 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7612 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7613 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7614 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7615 } else { 7616 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7617 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7618 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7619 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7620 } 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vcastItoX_evex(vec dst, vec src) %{ 7626 predicate(UseAVX > 2 || 7627 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7628 match(Set dst (VectorCastI2X src)); 7629 format %{ "vector_cast_i2x $dst,$src\t!" %} 7630 ins_encode %{ 7631 assert(UseAVX > 0, "required"); 7632 7633 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7634 int src_vlen_enc = vector_length_encoding(this, $src); 7635 int dst_vlen_enc = vector_length_encoding(this); 7636 switch (dst_elem_bt) { 7637 case T_BYTE: 7638 if (!VM_Version::supports_avx512vl()) { 7639 src_vlen_enc = Assembler::AVX_512bit; 7640 } 7641 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7642 break; 7643 case T_SHORT: 7644 if (!VM_Version::supports_avx512vl()) { 7645 src_vlen_enc = Assembler::AVX_512bit; 7646 } 7647 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7648 break; 7649 case T_FLOAT: 7650 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7651 break; 7652 case T_LONG: 7653 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7654 break; 7655 case T_DOUBLE: 7656 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7657 break; 7658 default: 7659 ShouldNotReachHere(); 7660 } 7661 %} 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 instruct vcastLtoBS(vec dst, vec src) %{ 7666 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7667 UseAVX <= 2); 7668 match(Set dst (VectorCastL2X src)); 7669 format %{ "vector_cast_l2x $dst,$src" %} 7670 ins_encode %{ 7671 assert(UseAVX > 0, "required"); 7672 7673 int vlen = Matcher::vector_length_in_bytes(this, $src); 7674 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7675 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7676 : ExternalAddress(vector_int_to_short_mask()); 7677 if (vlen <= 16) { 7678 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7679 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7680 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7681 } else { 7682 assert(vlen <= 32, "required"); 7683 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7684 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7685 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7686 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7687 } 7688 if (to_elem_bt == T_BYTE) { 7689 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7690 } 7691 %} 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 instruct vcastLtoX_evex(vec dst, vec src) %{ 7696 predicate(UseAVX > 2 || 7697 (Matcher::vector_element_basic_type(n) == T_INT || 7698 Matcher::vector_element_basic_type(n) == T_FLOAT || 7699 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7700 match(Set dst (VectorCastL2X src)); 7701 format %{ "vector_cast_l2x $dst,$src\t!" %} 7702 ins_encode %{ 7703 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7704 int vlen = Matcher::vector_length_in_bytes(this, $src); 7705 int vlen_enc = vector_length_encoding(this, $src); 7706 switch (to_elem_bt) { 7707 case T_BYTE: 7708 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7709 vlen_enc = Assembler::AVX_512bit; 7710 } 7711 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7712 break; 7713 case T_SHORT: 7714 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7715 vlen_enc = Assembler::AVX_512bit; 7716 } 7717 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7718 break; 7719 case T_INT: 7720 if (vlen == 8) { 7721 if ($dst$$XMMRegister != $src$$XMMRegister) { 7722 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7723 } 7724 } else if (vlen == 16) { 7725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7726 } else if (vlen == 32) { 7727 if (UseAVX > 2) { 7728 if (!VM_Version::supports_avx512vl()) { 7729 vlen_enc = Assembler::AVX_512bit; 7730 } 7731 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7732 } else { 7733 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7734 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7735 } 7736 } else { // vlen == 64 7737 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7738 } 7739 break; 7740 case T_FLOAT: 7741 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7742 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7743 break; 7744 case T_DOUBLE: 7745 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7746 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7747 break; 7748 7749 default: assert(false, "%s", type2name(to_elem_bt)); 7750 } 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vcastFtoD_reg(vec dst, vec src) %{ 7756 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7757 match(Set dst (VectorCastF2X src)); 7758 format %{ "vector_cast_f2d $dst,$src\t!" %} 7759 ins_encode %{ 7760 int vlen_enc = vector_length_encoding(this); 7761 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 7767 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7768 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7769 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7770 match(Set dst (VectorCastF2X src)); 7771 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7772 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7773 ins_encode %{ 7774 int vlen_enc = vector_length_encoding(this, $src); 7775 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7776 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7777 // 32 bit addresses for register indirect addressing mode since stub constants 7778 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7779 // However, targets are free to increase this limit, but having a large code cache size 7780 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7781 // cap we save a temporary register allocation which in limiting case can prevent 7782 // spilling in high register pressure blocks. 7783 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7784 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7785 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7786 %} 7787 ins_pipe( pipe_slow ); 7788 %} 7789 7790 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7791 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7792 is_integral_type(Matcher::vector_element_basic_type(n))); 7793 match(Set dst (VectorCastF2X src)); 7794 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7795 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7796 ins_encode %{ 7797 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7798 if (to_elem_bt == T_LONG) { 7799 int vlen_enc = vector_length_encoding(this); 7800 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7801 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7802 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7803 } else { 7804 int vlen_enc = vector_length_encoding(this, $src); 7805 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7806 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7807 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7808 } 7809 %} 7810 ins_pipe( pipe_slow ); 7811 %} 7812 7813 instruct vcastDtoF_reg(vec dst, vec src) %{ 7814 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7815 match(Set dst (VectorCastD2X src)); 7816 format %{ "vector_cast_d2x $dst,$src\t!" %} 7817 ins_encode %{ 7818 int vlen_enc = vector_length_encoding(this, $src); 7819 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7820 %} 7821 ins_pipe( pipe_slow ); 7822 %} 7823 7824 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7825 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7826 is_integral_type(Matcher::vector_element_basic_type(n))); 7827 match(Set dst (VectorCastD2X src)); 7828 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7829 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7830 ins_encode %{ 7831 int vlen_enc = vector_length_encoding(this, $src); 7832 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7833 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7834 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7835 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7841 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7842 is_integral_type(Matcher::vector_element_basic_type(n))); 7843 match(Set dst (VectorCastD2X src)); 7844 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7845 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7846 ins_encode %{ 7847 int vlen_enc = vector_length_encoding(this, $src); 7848 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7849 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7850 ExternalAddress(vector_float_signflip()); 7851 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7852 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7853 %} 7854 ins_pipe( pipe_slow ); 7855 %} 7856 7857 instruct vucast(vec dst, vec src) %{ 7858 match(Set dst (VectorUCastB2X src)); 7859 match(Set dst (VectorUCastS2X src)); 7860 match(Set dst (VectorUCastI2X src)); 7861 format %{ "vector_ucast $dst,$src\t!" %} 7862 ins_encode %{ 7863 assert(UseAVX > 0, "required"); 7864 7865 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7866 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7867 int vlen_enc = vector_length_encoding(this); 7868 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 #ifdef _LP64 7874 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7875 predicate(!VM_Version::supports_avx512vl() && 7876 Matcher::vector_length_in_bytes(n) < 64 && 7877 Matcher::vector_element_basic_type(n) == T_INT); 7878 match(Set dst (RoundVF src)); 7879 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7880 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7881 ins_encode %{ 7882 int vlen_enc = vector_length_encoding(this); 7883 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7884 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7885 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7886 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7892 predicate((VM_Version::supports_avx512vl() || 7893 Matcher::vector_length_in_bytes(n) == 64) && 7894 Matcher::vector_element_basic_type(n) == T_INT); 7895 match(Set dst (RoundVF src)); 7896 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7897 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7898 ins_encode %{ 7899 int vlen_enc = vector_length_encoding(this); 7900 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7901 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7902 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7903 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7909 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7910 match(Set dst (RoundVD src)); 7911 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7912 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7913 ins_encode %{ 7914 int vlen_enc = vector_length_encoding(this); 7915 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7916 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7917 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7918 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 #endif // _LP64 7924 7925 // --------------------------------- VectorMaskCmp -------------------------------------- 7926 7927 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7928 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7929 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7930 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7931 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7932 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7933 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7934 ins_encode %{ 7935 int vlen_enc = vector_length_encoding(this, $src1); 7936 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7937 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7938 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7939 } else { 7940 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7941 } 7942 %} 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7947 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7948 n->bottom_type()->isa_vectmask() == nullptr && 7949 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7950 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7951 effect(TEMP ktmp); 7952 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7953 ins_encode %{ 7954 int vlen_enc = Assembler::AVX_512bit; 7955 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7956 KRegister mask = k0; // The comparison itself is not being masked. 7957 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7958 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7959 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7960 } else { 7961 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7962 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7963 } 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7969 predicate(n->bottom_type()->isa_vectmask() && 7970 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7971 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7972 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7973 ins_encode %{ 7974 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7975 int vlen_enc = vector_length_encoding(this, $src1); 7976 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7977 KRegister mask = k0; // The comparison itself is not being masked. 7978 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7979 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7980 } else { 7981 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7982 } 7983 %} 7984 ins_pipe( pipe_slow ); 7985 %} 7986 7987 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7988 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7989 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7990 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7991 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7992 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7993 (n->in(2)->get_int() == BoolTest::eq || 7994 n->in(2)->get_int() == BoolTest::lt || 7995 n->in(2)->get_int() == BoolTest::gt)); // cond 7996 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7997 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7998 ins_encode %{ 7999 int vlen_enc = vector_length_encoding(this, $src1); 8000 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8001 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8002 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8008 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8009 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8010 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8011 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8012 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8013 (n->in(2)->get_int() == BoolTest::ne || 8014 n->in(2)->get_int() == BoolTest::le || 8015 n->in(2)->get_int() == BoolTest::ge)); // cond 8016 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8017 effect(TEMP dst, TEMP xtmp); 8018 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8019 ins_encode %{ 8020 int vlen_enc = vector_length_encoding(this, $src1); 8021 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8022 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8023 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8029 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8030 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8031 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8032 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8033 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8034 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8035 effect(TEMP dst, TEMP xtmp); 8036 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8037 ins_encode %{ 8038 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8039 int vlen_enc = vector_length_encoding(this, $src1); 8040 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8041 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8042 8043 if (vlen_enc == Assembler::AVX_128bit) { 8044 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8045 } else { 8046 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8047 } 8048 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8049 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8050 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8051 %} 8052 ins_pipe( pipe_slow ); 8053 %} 8054 8055 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8056 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8057 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8058 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8059 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8060 effect(TEMP ktmp); 8061 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8062 ins_encode %{ 8063 assert(UseAVX > 2, "required"); 8064 8065 int vlen_enc = vector_length_encoding(this, $src1); 8066 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8067 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8068 KRegister mask = k0; // The comparison itself is not being masked. 8069 bool merge = false; 8070 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8071 8072 switch (src1_elem_bt) { 8073 case T_INT: { 8074 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8075 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8076 break; 8077 } 8078 case T_LONG: { 8079 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8080 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8081 break; 8082 } 8083 default: assert(false, "%s", type2name(src1_elem_bt)); 8084 } 8085 %} 8086 ins_pipe( pipe_slow ); 8087 %} 8088 8089 8090 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8091 predicate(n->bottom_type()->isa_vectmask() && 8092 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8093 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8094 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8095 ins_encode %{ 8096 assert(UseAVX > 2, "required"); 8097 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8098 8099 int vlen_enc = vector_length_encoding(this, $src1); 8100 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8101 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8102 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8103 8104 // Comparison i 8105 switch (src1_elem_bt) { 8106 case T_BYTE: { 8107 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8108 break; 8109 } 8110 case T_SHORT: { 8111 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8112 break; 8113 } 8114 case T_INT: { 8115 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8116 break; 8117 } 8118 case T_LONG: { 8119 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8120 break; 8121 } 8122 default: assert(false, "%s", type2name(src1_elem_bt)); 8123 } 8124 %} 8125 ins_pipe( pipe_slow ); 8126 %} 8127 8128 // Extract 8129 8130 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8131 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8132 match(Set dst (ExtractI src idx)); 8133 match(Set dst (ExtractS src idx)); 8134 #ifdef _LP64 8135 match(Set dst (ExtractB src idx)); 8136 #endif 8137 format %{ "extractI $dst,$src,$idx\t!" %} 8138 ins_encode %{ 8139 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8140 8141 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8142 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8148 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8149 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8150 match(Set dst (ExtractI src idx)); 8151 match(Set dst (ExtractS src idx)); 8152 #ifdef _LP64 8153 match(Set dst (ExtractB src idx)); 8154 #endif 8155 effect(TEMP vtmp); 8156 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8157 ins_encode %{ 8158 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8159 8160 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8161 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8162 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 #ifdef _LP64 8168 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8169 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8170 match(Set dst (ExtractL src idx)); 8171 format %{ "extractL $dst,$src,$idx\t!" %} 8172 ins_encode %{ 8173 assert(UseSSE >= 4, "required"); 8174 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8175 8176 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8182 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8183 Matcher::vector_length(n->in(1)) == 8); // src 8184 match(Set dst (ExtractL src idx)); 8185 effect(TEMP vtmp); 8186 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8187 ins_encode %{ 8188 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8189 8190 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8191 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8192 %} 8193 ins_pipe( pipe_slow ); 8194 %} 8195 #endif 8196 8197 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8198 predicate(Matcher::vector_length(n->in(1)) <= 4); 8199 match(Set dst (ExtractF src idx)); 8200 effect(TEMP dst, TEMP vtmp); 8201 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8202 ins_encode %{ 8203 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8204 8205 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8211 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8212 Matcher::vector_length(n->in(1)/*src*/) == 16); 8213 match(Set dst (ExtractF src idx)); 8214 effect(TEMP vtmp); 8215 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8216 ins_encode %{ 8217 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8218 8219 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8220 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8226 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8227 match(Set dst (ExtractD src idx)); 8228 format %{ "extractD $dst,$src,$idx\t!" %} 8229 ins_encode %{ 8230 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8231 8232 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8238 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8239 Matcher::vector_length(n->in(1)) == 8); // src 8240 match(Set dst (ExtractD src idx)); 8241 effect(TEMP vtmp); 8242 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8243 ins_encode %{ 8244 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8245 8246 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8247 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 // --------------------------------- Vector Blend -------------------------------------- 8253 8254 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8255 predicate(UseAVX == 0); 8256 match(Set dst (VectorBlend (Binary dst src) mask)); 8257 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8258 effect(TEMP tmp); 8259 ins_encode %{ 8260 assert(UseSSE >= 4, "required"); 8261 8262 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8263 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8264 } 8265 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8266 %} 8267 ins_pipe( pipe_slow ); 8268 %} 8269 8270 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8271 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8272 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8273 Matcher::vector_length_in_bytes(n) <= 32 && 8274 is_integral_type(Matcher::vector_element_basic_type(n))); 8275 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8276 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8277 ins_encode %{ 8278 int vlen_enc = vector_length_encoding(this); 8279 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8280 %} 8281 ins_pipe( pipe_slow ); 8282 %} 8283 8284 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8285 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8286 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8287 Matcher::vector_length_in_bytes(n) <= 32 && 8288 !is_integral_type(Matcher::vector_element_basic_type(n))); 8289 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8290 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8291 ins_encode %{ 8292 int vlen_enc = vector_length_encoding(this); 8293 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8299 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8300 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8301 Matcher::vector_length_in_bytes(n) <= 32); 8302 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8303 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8304 effect(TEMP vtmp, TEMP dst); 8305 ins_encode %{ 8306 int vlen_enc = vector_length_encoding(this); 8307 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8308 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8309 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8310 %} 8311 ins_pipe( pipe_slow ); 8312 %} 8313 8314 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8315 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8316 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8317 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8318 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8319 effect(TEMP ktmp); 8320 ins_encode %{ 8321 int vlen_enc = Assembler::AVX_512bit; 8322 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8323 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8324 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 8330 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8331 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8332 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8333 VM_Version::supports_avx512bw())); 8334 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8335 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8336 ins_encode %{ 8337 int vlen_enc = vector_length_encoding(this); 8338 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8339 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8340 %} 8341 ins_pipe( pipe_slow ); 8342 %} 8343 8344 // --------------------------------- ABS -------------------------------------- 8345 // a = |a| 8346 instruct vabsB_reg(vec dst, vec src) %{ 8347 match(Set dst (AbsVB src)); 8348 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8349 ins_encode %{ 8350 uint vlen = Matcher::vector_length(this); 8351 if (vlen <= 16) { 8352 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8353 } else { 8354 int vlen_enc = vector_length_encoding(this); 8355 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8356 } 8357 %} 8358 ins_pipe( pipe_slow ); 8359 %} 8360 8361 instruct vabsS_reg(vec dst, vec src) %{ 8362 match(Set dst (AbsVS src)); 8363 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8364 ins_encode %{ 8365 uint vlen = Matcher::vector_length(this); 8366 if (vlen <= 8) { 8367 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8368 } else { 8369 int vlen_enc = vector_length_encoding(this); 8370 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8371 } 8372 %} 8373 ins_pipe( pipe_slow ); 8374 %} 8375 8376 instruct vabsI_reg(vec dst, vec src) %{ 8377 match(Set dst (AbsVI src)); 8378 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8379 ins_encode %{ 8380 uint vlen = Matcher::vector_length(this); 8381 if (vlen <= 4) { 8382 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8383 } else { 8384 int vlen_enc = vector_length_encoding(this); 8385 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8386 } 8387 %} 8388 ins_pipe( pipe_slow ); 8389 %} 8390 8391 instruct vabsL_reg(vec dst, vec src) %{ 8392 match(Set dst (AbsVL src)); 8393 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8394 ins_encode %{ 8395 assert(UseAVX > 2, "required"); 8396 int vlen_enc = vector_length_encoding(this); 8397 if (!VM_Version::supports_avx512vl()) { 8398 vlen_enc = Assembler::AVX_512bit; 8399 } 8400 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 // --------------------------------- ABSNEG -------------------------------------- 8406 8407 instruct vabsnegF(vec dst, vec src) %{ 8408 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8409 match(Set dst (AbsVF src)); 8410 match(Set dst (NegVF src)); 8411 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8412 ins_cost(150); 8413 ins_encode %{ 8414 int opcode = this->ideal_Opcode(); 8415 int vlen = Matcher::vector_length(this); 8416 if (vlen == 2) { 8417 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8418 } else { 8419 assert(vlen == 8 || vlen == 16, "required"); 8420 int vlen_enc = vector_length_encoding(this); 8421 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8422 } 8423 %} 8424 ins_pipe( pipe_slow ); 8425 %} 8426 8427 instruct vabsneg4F(vec dst) %{ 8428 predicate(Matcher::vector_length(n) == 4); 8429 match(Set dst (AbsVF dst)); 8430 match(Set dst (NegVF dst)); 8431 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8432 ins_cost(150); 8433 ins_encode %{ 8434 int opcode = this->ideal_Opcode(); 8435 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8436 %} 8437 ins_pipe( pipe_slow ); 8438 %} 8439 8440 instruct vabsnegD(vec dst, vec src) %{ 8441 match(Set dst (AbsVD src)); 8442 match(Set dst (NegVD src)); 8443 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8444 ins_encode %{ 8445 int opcode = this->ideal_Opcode(); 8446 uint vlen = Matcher::vector_length(this); 8447 if (vlen == 2) { 8448 assert(UseSSE >= 2, "required"); 8449 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8450 } else { 8451 int vlen_enc = vector_length_encoding(this); 8452 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8453 } 8454 %} 8455 ins_pipe( pipe_slow ); 8456 %} 8457 8458 //------------------------------------- VectorTest -------------------------------------------- 8459 8460 #ifdef _LP64 8461 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8462 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8463 match(Set cr (VectorTest src1 src2)); 8464 effect(TEMP vtmp); 8465 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8466 ins_encode %{ 8467 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8468 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8469 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8470 %} 8471 ins_pipe( pipe_slow ); 8472 %} 8473 8474 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8475 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8476 match(Set cr (VectorTest src1 src2)); 8477 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8478 ins_encode %{ 8479 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8480 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8481 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8482 %} 8483 ins_pipe( pipe_slow ); 8484 %} 8485 8486 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8487 predicate((Matcher::vector_length(n->in(1)) < 8 || 8488 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8489 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8490 match(Set cr (VectorTest src1 src2)); 8491 effect(TEMP tmp); 8492 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8493 ins_encode %{ 8494 uint masklen = Matcher::vector_length(this, $src1); 8495 __ kmovwl($tmp$$Register, $src1$$KRegister); 8496 __ andl($tmp$$Register, (1 << masklen) - 1); 8497 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8503 predicate((Matcher::vector_length(n->in(1)) < 8 || 8504 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8505 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8506 match(Set cr (VectorTest src1 src2)); 8507 effect(TEMP tmp); 8508 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8509 ins_encode %{ 8510 uint masklen = Matcher::vector_length(this, $src1); 8511 __ kmovwl($tmp$$Register, $src1$$KRegister); 8512 __ andl($tmp$$Register, (1 << masklen) - 1); 8513 %} 8514 ins_pipe( pipe_slow ); 8515 %} 8516 8517 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8518 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8519 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8520 match(Set cr (VectorTest src1 src2)); 8521 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8522 ins_encode %{ 8523 uint masklen = Matcher::vector_length(this, $src1); 8524 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8525 %} 8526 ins_pipe( pipe_slow ); 8527 %} 8528 #endif 8529 8530 //------------------------------------- LoadMask -------------------------------------------- 8531 8532 instruct loadMask(legVec dst, legVec src) %{ 8533 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8534 match(Set dst (VectorLoadMask src)); 8535 effect(TEMP dst); 8536 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8537 ins_encode %{ 8538 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8539 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8540 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8541 %} 8542 ins_pipe( pipe_slow ); 8543 %} 8544 8545 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8546 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8547 match(Set dst (VectorLoadMask src)); 8548 effect(TEMP xtmp); 8549 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8550 ins_encode %{ 8551 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8552 true, Assembler::AVX_512bit); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8558 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8559 match(Set dst (VectorLoadMask src)); 8560 effect(TEMP xtmp); 8561 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8562 ins_encode %{ 8563 int vlen_enc = vector_length_encoding(in(1)); 8564 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8565 false, vlen_enc); 8566 %} 8567 ins_pipe( pipe_slow ); 8568 %} 8569 8570 //------------------------------------- StoreMask -------------------------------------------- 8571 8572 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8573 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8574 match(Set dst (VectorStoreMask src size)); 8575 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8576 ins_encode %{ 8577 int vlen = Matcher::vector_length(this); 8578 if (vlen <= 16 && UseAVX <= 2) { 8579 assert(UseSSE >= 3, "required"); 8580 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8581 } else { 8582 assert(UseAVX > 0, "required"); 8583 int src_vlen_enc = vector_length_encoding(this, $src); 8584 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8585 } 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8591 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8592 match(Set dst (VectorStoreMask src size)); 8593 effect(TEMP_DEF dst, TEMP xtmp); 8594 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8595 ins_encode %{ 8596 int vlen_enc = Assembler::AVX_128bit; 8597 int vlen = Matcher::vector_length(this); 8598 if (vlen <= 8) { 8599 assert(UseSSE >= 3, "required"); 8600 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8601 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8602 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8603 } else { 8604 assert(UseAVX > 0, "required"); 8605 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8606 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8607 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8608 } 8609 %} 8610 ins_pipe( pipe_slow ); 8611 %} 8612 8613 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8614 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8615 match(Set dst (VectorStoreMask src size)); 8616 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8617 effect(TEMP_DEF dst, TEMP xtmp); 8618 ins_encode %{ 8619 int vlen_enc = Assembler::AVX_128bit; 8620 int vlen = Matcher::vector_length(this); 8621 if (vlen <= 4) { 8622 assert(UseSSE >= 3, "required"); 8623 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8624 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8625 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8626 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8627 } else { 8628 assert(UseAVX > 0, "required"); 8629 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8630 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8631 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8632 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8633 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8634 } 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8640 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8641 match(Set dst (VectorStoreMask src size)); 8642 effect(TEMP_DEF dst, TEMP xtmp); 8643 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8644 ins_encode %{ 8645 assert(UseSSE >= 3, "required"); 8646 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8647 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8648 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8649 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8650 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8651 %} 8652 ins_pipe( pipe_slow ); 8653 %} 8654 8655 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8656 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8657 match(Set dst (VectorStoreMask src size)); 8658 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8659 effect(TEMP_DEF dst, TEMP vtmp); 8660 ins_encode %{ 8661 int vlen_enc = Assembler::AVX_128bit; 8662 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8663 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8664 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8665 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8666 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8667 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8668 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8669 %} 8670 ins_pipe( pipe_slow ); 8671 %} 8672 8673 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8674 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8675 match(Set dst (VectorStoreMask src size)); 8676 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8677 ins_encode %{ 8678 int src_vlen_enc = vector_length_encoding(this, $src); 8679 int dst_vlen_enc = vector_length_encoding(this); 8680 if (!VM_Version::supports_avx512vl()) { 8681 src_vlen_enc = Assembler::AVX_512bit; 8682 } 8683 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8684 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8685 %} 8686 ins_pipe( pipe_slow ); 8687 %} 8688 8689 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8690 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8691 match(Set dst (VectorStoreMask src size)); 8692 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8693 ins_encode %{ 8694 int src_vlen_enc = vector_length_encoding(this, $src); 8695 int dst_vlen_enc = vector_length_encoding(this); 8696 if (!VM_Version::supports_avx512vl()) { 8697 src_vlen_enc = Assembler::AVX_512bit; 8698 } 8699 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8700 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8701 %} 8702 ins_pipe( pipe_slow ); 8703 %} 8704 8705 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8706 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8707 match(Set dst (VectorStoreMask mask size)); 8708 effect(TEMP_DEF dst); 8709 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8710 ins_encode %{ 8711 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8712 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8713 false, Assembler::AVX_512bit, noreg); 8714 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8715 %} 8716 ins_pipe( pipe_slow ); 8717 %} 8718 8719 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8720 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8721 match(Set dst (VectorStoreMask mask size)); 8722 effect(TEMP_DEF dst); 8723 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8724 ins_encode %{ 8725 int dst_vlen_enc = vector_length_encoding(this); 8726 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8727 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8728 %} 8729 ins_pipe( pipe_slow ); 8730 %} 8731 8732 instruct vmaskcast_evex(kReg dst) %{ 8733 match(Set dst (VectorMaskCast dst)); 8734 ins_cost(0); 8735 format %{ "vector_mask_cast $dst" %} 8736 ins_encode %{ 8737 // empty 8738 %} 8739 ins_pipe(empty); 8740 %} 8741 8742 instruct vmaskcast(vec dst) %{ 8743 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8744 match(Set dst (VectorMaskCast dst)); 8745 ins_cost(0); 8746 format %{ "vector_mask_cast $dst" %} 8747 ins_encode %{ 8748 // empty 8749 %} 8750 ins_pipe(empty); 8751 %} 8752 8753 instruct vmaskcast_avx(vec dst, vec src) %{ 8754 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8755 match(Set dst (VectorMaskCast src)); 8756 format %{ "vector_mask_cast $dst, $src" %} 8757 ins_encode %{ 8758 int vlen = Matcher::vector_length(this); 8759 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8760 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8761 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8762 %} 8763 ins_pipe(pipe_slow); 8764 %} 8765 8766 //-------------------------------- Load Iota Indices ---------------------------------- 8767 8768 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8769 match(Set dst (VectorLoadConst src)); 8770 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8771 ins_encode %{ 8772 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8773 BasicType bt = Matcher::vector_element_basic_type(this); 8774 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8775 %} 8776 ins_pipe( pipe_slow ); 8777 %} 8778 8779 #ifdef _LP64 8780 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8781 match(Set dst (PopulateIndex src1 src2)); 8782 effect(TEMP dst, TEMP vtmp); 8783 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8784 ins_encode %{ 8785 assert($src2$$constant == 1, "required"); 8786 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8787 int vlen_enc = vector_length_encoding(this); 8788 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8789 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8790 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8791 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8797 match(Set dst (PopulateIndex src1 src2)); 8798 effect(TEMP dst, TEMP vtmp); 8799 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8800 ins_encode %{ 8801 assert($src2$$constant == 1, "required"); 8802 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8803 int vlen_enc = vector_length_encoding(this); 8804 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8805 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8806 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8807 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 #endif 8812 //-------------------------------- Rearrange ---------------------------------- 8813 8814 // LoadShuffle/Rearrange for Byte 8815 8816 instruct loadShuffleB(vec dst) %{ 8817 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8818 match(Set dst (VectorLoadShuffle dst)); 8819 format %{ "vector_load_shuffle $dst, $dst" %} 8820 ins_encode %{ 8821 // empty 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct rearrangeB(vec dst, vec shuffle) %{ 8827 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8828 Matcher::vector_length(n) < 32); 8829 match(Set dst (VectorRearrange dst shuffle)); 8830 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8831 ins_encode %{ 8832 assert(UseSSE >= 4, "required"); 8833 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8834 %} 8835 ins_pipe( pipe_slow ); 8836 %} 8837 8838 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8839 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8840 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8841 match(Set dst (VectorRearrange src shuffle)); 8842 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8843 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8844 ins_encode %{ 8845 assert(UseAVX >= 2, "required"); 8846 // Swap src into vtmp1 8847 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8848 // Shuffle swapped src to get entries from other 128 bit lane 8849 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8850 // Shuffle original src to get entries from self 128 bit lane 8851 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8852 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8853 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8854 // Perform the blend 8855 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 8861 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8862 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8863 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8864 match(Set dst (VectorRearrange src shuffle)); 8865 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8866 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8867 ins_encode %{ 8868 int vlen_enc = vector_length_encoding(this); 8869 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8870 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8871 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8877 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8878 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8879 match(Set dst (VectorRearrange src shuffle)); 8880 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8881 ins_encode %{ 8882 int vlen_enc = vector_length_encoding(this); 8883 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 // LoadShuffle/Rearrange for Short 8889 8890 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8891 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8892 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8893 match(Set dst (VectorLoadShuffle src)); 8894 effect(TEMP dst, TEMP vtmp); 8895 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8896 ins_encode %{ 8897 // Create a byte shuffle mask from short shuffle mask 8898 // only byte shuffle instruction available on these platforms 8899 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8900 if (UseAVX == 0) { 8901 assert(vlen_in_bytes <= 16, "required"); 8902 // Multiply each shuffle by two to get byte index 8903 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8904 __ psllw($vtmp$$XMMRegister, 1); 8905 8906 // Duplicate to create 2 copies of byte index 8907 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8908 __ psllw($dst$$XMMRegister, 8); 8909 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8910 8911 // Add one to get alternate byte index 8912 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8913 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8914 } else { 8915 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8916 int vlen_enc = vector_length_encoding(this); 8917 // Multiply each shuffle by two to get byte index 8918 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8919 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8920 8921 // Duplicate to create 2 copies of byte index 8922 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8923 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8924 8925 // Add one to get alternate byte index 8926 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8927 } 8928 %} 8929 ins_pipe( pipe_slow ); 8930 %} 8931 8932 instruct rearrangeS(vec dst, vec shuffle) %{ 8933 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8934 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8935 match(Set dst (VectorRearrange dst shuffle)); 8936 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8937 ins_encode %{ 8938 assert(UseSSE >= 4, "required"); 8939 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8945 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8946 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8947 match(Set dst (VectorRearrange src shuffle)); 8948 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8949 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8950 ins_encode %{ 8951 assert(UseAVX >= 2, "required"); 8952 // Swap src into vtmp1 8953 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8954 // Shuffle swapped src to get entries from other 128 bit lane 8955 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8956 // Shuffle original src to get entries from self 128 bit lane 8957 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8958 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8959 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8960 // Perform the blend 8961 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8962 %} 8963 ins_pipe( pipe_slow ); 8964 %} 8965 8966 instruct loadShuffleS_evex(vec dst, vec src) %{ 8967 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8968 VM_Version::supports_avx512bw()); 8969 match(Set dst (VectorLoadShuffle src)); 8970 format %{ "vector_load_shuffle $dst, $src" %} 8971 ins_encode %{ 8972 int vlen_enc = vector_length_encoding(this); 8973 if (!VM_Version::supports_avx512vl()) { 8974 vlen_enc = Assembler::AVX_512bit; 8975 } 8976 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8982 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8983 VM_Version::supports_avx512bw()); 8984 match(Set dst (VectorRearrange src shuffle)); 8985 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8986 ins_encode %{ 8987 int vlen_enc = vector_length_encoding(this); 8988 if (!VM_Version::supports_avx512vl()) { 8989 vlen_enc = Assembler::AVX_512bit; 8990 } 8991 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 // LoadShuffle/Rearrange for Integer and Float 8997 8998 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8999 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9000 Matcher::vector_length(n) == 4 && UseAVX == 0); 9001 match(Set dst (VectorLoadShuffle src)); 9002 effect(TEMP dst, TEMP vtmp); 9003 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9004 ins_encode %{ 9005 assert(UseSSE >= 4, "required"); 9006 9007 // Create a byte shuffle mask from int shuffle mask 9008 // only byte shuffle instruction available on these platforms 9009 9010 // Duplicate and multiply each shuffle by 4 9011 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 9012 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9013 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9014 __ psllw($vtmp$$XMMRegister, 2); 9015 9016 // Duplicate again to create 4 copies of byte index 9017 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9018 __ psllw($dst$$XMMRegister, 8); 9019 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9020 9021 // Add 3,2,1,0 to get alternate byte index 9022 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9023 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct rearrangeI(vec dst, vec shuffle) %{ 9029 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9030 UseAVX == 0); 9031 match(Set dst (VectorRearrange dst shuffle)); 9032 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9033 ins_encode %{ 9034 assert(UseSSE >= 4, "required"); 9035 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9036 %} 9037 ins_pipe( pipe_slow ); 9038 %} 9039 9040 instruct loadShuffleI_avx(vec dst, vec src) %{ 9041 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9042 UseAVX > 0); 9043 match(Set dst (VectorLoadShuffle src)); 9044 format %{ "vector_load_shuffle $dst, $src" %} 9045 ins_encode %{ 9046 int vlen_enc = vector_length_encoding(this); 9047 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9048 %} 9049 ins_pipe( pipe_slow ); 9050 %} 9051 9052 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9053 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9054 UseAVX > 0); 9055 match(Set dst (VectorRearrange src shuffle)); 9056 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9057 ins_encode %{ 9058 int vlen_enc = vector_length_encoding(this); 9059 BasicType bt = Matcher::vector_element_basic_type(this); 9060 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9061 %} 9062 ins_pipe( pipe_slow ); 9063 %} 9064 9065 // LoadShuffle/Rearrange for Long and Double 9066 9067 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9068 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9069 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9070 match(Set dst (VectorLoadShuffle src)); 9071 effect(TEMP dst, TEMP vtmp); 9072 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9073 ins_encode %{ 9074 assert(UseAVX >= 2, "required"); 9075 9076 int vlen_enc = vector_length_encoding(this); 9077 // Create a double word shuffle mask from long shuffle mask 9078 // only double word shuffle instruction available on these platforms 9079 9080 // Multiply each shuffle by two to get double word index 9081 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 9082 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 9083 9084 // Duplicate each double word shuffle 9085 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9086 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9087 9088 // Add one to get alternate double word index 9089 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9090 %} 9091 ins_pipe( pipe_slow ); 9092 %} 9093 9094 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9095 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9096 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9097 match(Set dst (VectorRearrange src shuffle)); 9098 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9099 ins_encode %{ 9100 assert(UseAVX >= 2, "required"); 9101 9102 int vlen_enc = vector_length_encoding(this); 9103 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 instruct loadShuffleL_evex(vec dst, vec src) %{ 9109 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9110 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9111 match(Set dst (VectorLoadShuffle src)); 9112 format %{ "vector_load_shuffle $dst, $src" %} 9113 ins_encode %{ 9114 assert(UseAVX > 2, "required"); 9115 9116 int vlen_enc = vector_length_encoding(this); 9117 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9118 %} 9119 ins_pipe( pipe_slow ); 9120 %} 9121 9122 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9123 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9124 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9125 match(Set dst (VectorRearrange src shuffle)); 9126 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9127 ins_encode %{ 9128 assert(UseAVX > 2, "required"); 9129 9130 int vlen_enc = vector_length_encoding(this); 9131 if (vlen_enc == Assembler::AVX_128bit) { 9132 vlen_enc = Assembler::AVX_256bit; 9133 } 9134 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9135 %} 9136 ins_pipe( pipe_slow ); 9137 %} 9138 9139 // --------------------------------- FMA -------------------------------------- 9140 // a * b + c 9141 9142 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9143 match(Set c (FmaVF c (Binary a b))); 9144 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9145 ins_cost(150); 9146 ins_encode %{ 9147 assert(UseFMA, "not enabled"); 9148 int vlen_enc = vector_length_encoding(this); 9149 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9150 %} 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9155 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9156 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9157 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9158 ins_cost(150); 9159 ins_encode %{ 9160 assert(UseFMA, "not enabled"); 9161 int vlen_enc = vector_length_encoding(this); 9162 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9163 %} 9164 ins_pipe( pipe_slow ); 9165 %} 9166 9167 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9168 match(Set c (FmaVD c (Binary a b))); 9169 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9170 ins_cost(150); 9171 ins_encode %{ 9172 assert(UseFMA, "not enabled"); 9173 int vlen_enc = vector_length_encoding(this); 9174 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9180 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9181 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9182 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9183 ins_cost(150); 9184 ins_encode %{ 9185 assert(UseFMA, "not enabled"); 9186 int vlen_enc = vector_length_encoding(this); 9187 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 // --------------------------------- Vector Multiply Add -------------------------------------- 9193 9194 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9195 predicate(UseAVX == 0); 9196 match(Set dst (MulAddVS2VI dst src1)); 9197 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9198 ins_encode %{ 9199 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9205 predicate(UseAVX > 0); 9206 match(Set dst (MulAddVS2VI src1 src2)); 9207 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9208 ins_encode %{ 9209 int vlen_enc = vector_length_encoding(this); 9210 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9211 %} 9212 ins_pipe( pipe_slow ); 9213 %} 9214 9215 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9216 9217 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9218 predicate(VM_Version::supports_avx512_vnni()); 9219 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9220 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9221 ins_encode %{ 9222 assert(UseAVX > 2, "required"); 9223 int vlen_enc = vector_length_encoding(this); 9224 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9225 %} 9226 ins_pipe( pipe_slow ); 9227 ins_cost(10); 9228 %} 9229 9230 // --------------------------------- PopCount -------------------------------------- 9231 9232 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9233 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9234 match(Set dst (PopCountVI src)); 9235 match(Set dst (PopCountVL src)); 9236 format %{ "vector_popcount_integral $dst, $src" %} 9237 ins_encode %{ 9238 int opcode = this->ideal_Opcode(); 9239 int vlen_enc = vector_length_encoding(this, $src); 9240 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9241 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9242 %} 9243 ins_pipe( pipe_slow ); 9244 %} 9245 9246 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9247 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9248 match(Set dst (PopCountVI src mask)); 9249 match(Set dst (PopCountVL src mask)); 9250 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9251 ins_encode %{ 9252 int vlen_enc = vector_length_encoding(this, $src); 9253 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9254 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9255 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9261 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9262 match(Set dst (PopCountVI src)); 9263 match(Set dst (PopCountVL src)); 9264 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9265 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9266 ins_encode %{ 9267 int opcode = this->ideal_Opcode(); 9268 int vlen_enc = vector_length_encoding(this, $src); 9269 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9270 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9271 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9272 %} 9273 ins_pipe( pipe_slow ); 9274 %} 9275 9276 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9277 9278 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9279 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9280 Matcher::vector_length_in_bytes(n->in(1)))); 9281 match(Set dst (CountTrailingZerosV src)); 9282 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9283 ins_cost(400); 9284 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9285 ins_encode %{ 9286 int vlen_enc = vector_length_encoding(this, $src); 9287 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9288 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9289 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9295 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9296 VM_Version::supports_avx512cd() && 9297 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9298 match(Set dst (CountTrailingZerosV src)); 9299 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9300 ins_cost(400); 9301 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9302 ins_encode %{ 9303 int vlen_enc = vector_length_encoding(this, $src); 9304 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9305 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9306 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9312 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9313 match(Set dst (CountTrailingZerosV src)); 9314 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9315 ins_cost(400); 9316 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9317 ins_encode %{ 9318 int vlen_enc = vector_length_encoding(this, $src); 9319 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9320 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9321 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9322 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9323 %} 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9328 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9329 match(Set dst (CountTrailingZerosV src)); 9330 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9331 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9332 ins_encode %{ 9333 int vlen_enc = vector_length_encoding(this, $src); 9334 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9335 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9336 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9337 %} 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 9342 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9343 9344 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9345 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9346 effect(TEMP dst); 9347 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9348 ins_encode %{ 9349 int vector_len = vector_length_encoding(this); 9350 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9356 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9357 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9358 effect(TEMP dst); 9359 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9360 ins_encode %{ 9361 int vector_len = vector_length_encoding(this); 9362 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9363 %} 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 // --------------------------------- Rotation Operations ---------------------------------- 9368 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9369 match(Set dst (RotateLeftV src shift)); 9370 match(Set dst (RotateRightV src shift)); 9371 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9372 ins_encode %{ 9373 int opcode = this->ideal_Opcode(); 9374 int vector_len = vector_length_encoding(this); 9375 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9376 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 instruct vprorate(vec dst, vec src, vec shift) %{ 9382 match(Set dst (RotateLeftV src shift)); 9383 match(Set dst (RotateRightV src shift)); 9384 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9385 ins_encode %{ 9386 int opcode = this->ideal_Opcode(); 9387 int vector_len = vector_length_encoding(this); 9388 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9389 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9390 %} 9391 ins_pipe( pipe_slow ); 9392 %} 9393 9394 // ---------------------------------- Masked Operations ------------------------------------ 9395 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9396 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9397 match(Set dst (LoadVectorMasked mem mask)); 9398 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9399 ins_encode %{ 9400 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9401 int vlen_enc = vector_length_encoding(this); 9402 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 9408 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9409 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9410 match(Set dst (LoadVectorMasked mem mask)); 9411 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9412 ins_encode %{ 9413 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9414 int vector_len = vector_length_encoding(this); 9415 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9416 %} 9417 ins_pipe( pipe_slow ); 9418 %} 9419 9420 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9421 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9422 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9423 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9424 ins_encode %{ 9425 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9426 int vlen_enc = vector_length_encoding(src_node); 9427 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9428 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9434 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9435 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9436 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9437 ins_encode %{ 9438 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9439 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9440 int vlen_enc = vector_length_encoding(src_node); 9441 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9442 %} 9443 ins_pipe( pipe_slow ); 9444 %} 9445 9446 #ifdef _LP64 9447 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9448 match(Set addr (VerifyVectorAlignment addr mask)); 9449 effect(KILL cr); 9450 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9451 ins_encode %{ 9452 Label Lskip; 9453 // check if masked bits of addr are zero 9454 __ testq($addr$$Register, $mask$$constant); 9455 __ jccb(Assembler::equal, Lskip); 9456 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9457 __ bind(Lskip); 9458 %} 9459 ins_pipe(pipe_slow); 9460 %} 9461 9462 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9463 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9464 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9465 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9466 ins_encode %{ 9467 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9468 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9469 9470 Label DONE; 9471 int vlen_enc = vector_length_encoding(this, $src1); 9472 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9473 9474 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9475 __ mov64($dst$$Register, -1L); 9476 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9477 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9478 __ jccb(Assembler::carrySet, DONE); 9479 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9480 __ notq($dst$$Register); 9481 __ tzcntq($dst$$Register, $dst$$Register); 9482 __ bind(DONE); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 9488 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9489 match(Set dst (VectorMaskGen len)); 9490 effect(TEMP temp, KILL cr); 9491 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9492 ins_encode %{ 9493 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9494 %} 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9499 match(Set dst (VectorMaskGen len)); 9500 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9501 effect(TEMP temp); 9502 ins_encode %{ 9503 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9504 __ kmovql($dst$$KRegister, $temp$$Register); 9505 %} 9506 ins_pipe( pipe_slow ); 9507 %} 9508 9509 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9510 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9511 match(Set dst (VectorMaskToLong mask)); 9512 effect(TEMP dst, KILL cr); 9513 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9514 ins_encode %{ 9515 int opcode = this->ideal_Opcode(); 9516 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9517 int mask_len = Matcher::vector_length(this, $mask); 9518 int mask_size = mask_len * type2aelembytes(mbt); 9519 int vlen_enc = vector_length_encoding(this, $mask); 9520 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9521 $dst$$Register, mask_len, mask_size, vlen_enc); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9527 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9528 match(Set dst (VectorMaskToLong mask)); 9529 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9530 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9531 ins_encode %{ 9532 int opcode = this->ideal_Opcode(); 9533 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9534 int mask_len = Matcher::vector_length(this, $mask); 9535 int vlen_enc = vector_length_encoding(this, $mask); 9536 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9537 $dst$$Register, mask_len, mbt, vlen_enc); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9543 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9544 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9545 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9546 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9547 ins_encode %{ 9548 int opcode = this->ideal_Opcode(); 9549 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9550 int mask_len = Matcher::vector_length(this, $mask); 9551 int vlen_enc = vector_length_encoding(this, $mask); 9552 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9553 $dst$$Register, mask_len, mbt, vlen_enc); 9554 %} 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9559 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9560 match(Set dst (VectorMaskTrueCount mask)); 9561 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9562 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9563 ins_encode %{ 9564 int opcode = this->ideal_Opcode(); 9565 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9566 int mask_len = Matcher::vector_length(this, $mask); 9567 int mask_size = mask_len * type2aelembytes(mbt); 9568 int vlen_enc = vector_length_encoding(this, $mask); 9569 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9570 $tmp$$Register, mask_len, mask_size, vlen_enc); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9576 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9577 match(Set dst (VectorMaskTrueCount mask)); 9578 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9579 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9580 ins_encode %{ 9581 int opcode = this->ideal_Opcode(); 9582 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9583 int mask_len = Matcher::vector_length(this, $mask); 9584 int vlen_enc = vector_length_encoding(this, $mask); 9585 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9586 $tmp$$Register, mask_len, mbt, vlen_enc); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9592 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9593 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9594 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9595 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9596 ins_encode %{ 9597 int opcode = this->ideal_Opcode(); 9598 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9599 int mask_len = Matcher::vector_length(this, $mask); 9600 int vlen_enc = vector_length_encoding(this, $mask); 9601 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9602 $tmp$$Register, mask_len, mbt, vlen_enc); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9608 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9609 match(Set dst (VectorMaskFirstTrue mask)); 9610 match(Set dst (VectorMaskLastTrue mask)); 9611 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9612 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9613 ins_encode %{ 9614 int opcode = this->ideal_Opcode(); 9615 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9616 int mask_len = Matcher::vector_length(this, $mask); 9617 int mask_size = mask_len * type2aelembytes(mbt); 9618 int vlen_enc = vector_length_encoding(this, $mask); 9619 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9620 $tmp$$Register, mask_len, mask_size, vlen_enc); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9626 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9627 match(Set dst (VectorMaskFirstTrue mask)); 9628 match(Set dst (VectorMaskLastTrue mask)); 9629 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9630 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9631 ins_encode %{ 9632 int opcode = this->ideal_Opcode(); 9633 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9634 int mask_len = Matcher::vector_length(this, $mask); 9635 int vlen_enc = vector_length_encoding(this, $mask); 9636 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9637 $tmp$$Register, mask_len, mbt, vlen_enc); 9638 %} 9639 ins_pipe( pipe_slow ); 9640 %} 9641 9642 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9643 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9644 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9645 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9646 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9647 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9648 ins_encode %{ 9649 int opcode = this->ideal_Opcode(); 9650 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9651 int mask_len = Matcher::vector_length(this, $mask); 9652 int vlen_enc = vector_length_encoding(this, $mask); 9653 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9654 $tmp$$Register, mask_len, mbt, vlen_enc); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 // --------------------------------- Compress/Expand Operations --------------------------- 9660 #ifdef _LP64 9661 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9662 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9663 match(Set dst (CompressV src mask)); 9664 match(Set dst (ExpandV src mask)); 9665 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9666 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9667 ins_encode %{ 9668 int opcode = this->ideal_Opcode(); 9669 int vlen_enc = vector_length_encoding(this); 9670 BasicType bt = Matcher::vector_element_basic_type(this); 9671 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9672 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9673 %} 9674 ins_pipe( pipe_slow ); 9675 %} 9676 #endif 9677 9678 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9679 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9680 match(Set dst (CompressV src mask)); 9681 match(Set dst (ExpandV src mask)); 9682 format %{ "vector_compress_expand $dst, $src, $mask" %} 9683 ins_encode %{ 9684 int opcode = this->ideal_Opcode(); 9685 int vector_len = vector_length_encoding(this); 9686 BasicType bt = Matcher::vector_element_basic_type(this); 9687 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9693 match(Set dst (CompressM mask)); 9694 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9695 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9696 ins_encode %{ 9697 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9698 int mask_len = Matcher::vector_length(this); 9699 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 #endif // _LP64 9705 9706 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9707 9708 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9709 predicate(!VM_Version::supports_gfni()); 9710 match(Set dst (ReverseV src)); 9711 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9712 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9713 ins_encode %{ 9714 int vec_enc = vector_length_encoding(this); 9715 BasicType bt = Matcher::vector_element_basic_type(this); 9716 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9717 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9718 %} 9719 ins_pipe( pipe_slow ); 9720 %} 9721 9722 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9723 predicate(VM_Version::supports_gfni()); 9724 match(Set dst (ReverseV src)); 9725 effect(TEMP dst, TEMP xtmp); 9726 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9727 ins_encode %{ 9728 int vec_enc = vector_length_encoding(this); 9729 BasicType bt = Matcher::vector_element_basic_type(this); 9730 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9731 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9732 $xtmp$$XMMRegister); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 instruct vreverse_byte_reg(vec dst, vec src) %{ 9738 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9739 match(Set dst (ReverseBytesV src)); 9740 effect(TEMP dst); 9741 format %{ "vector_reverse_byte $dst, $src" %} 9742 ins_encode %{ 9743 int vec_enc = vector_length_encoding(this); 9744 BasicType bt = Matcher::vector_element_basic_type(this); 9745 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9746 %} 9747 ins_pipe( pipe_slow ); 9748 %} 9749 9750 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9751 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9752 match(Set dst (ReverseBytesV src)); 9753 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9754 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9755 ins_encode %{ 9756 int vec_enc = vector_length_encoding(this); 9757 BasicType bt = Matcher::vector_element_basic_type(this); 9758 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9759 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9760 %} 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9765 9766 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9767 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9768 Matcher::vector_length_in_bytes(n->in(1)))); 9769 match(Set dst (CountLeadingZerosV src)); 9770 format %{ "vector_count_leading_zeros $dst, $src" %} 9771 ins_encode %{ 9772 int vlen_enc = vector_length_encoding(this, $src); 9773 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9774 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9775 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9776 %} 9777 ins_pipe( pipe_slow ); 9778 %} 9779 9780 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9781 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9782 Matcher::vector_length_in_bytes(n->in(1)))); 9783 match(Set dst (CountLeadingZerosV src mask)); 9784 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9785 ins_encode %{ 9786 int vlen_enc = vector_length_encoding(this, $src); 9787 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9788 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9789 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9790 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9791 %} 9792 ins_pipe( pipe_slow ); 9793 %} 9794 9795 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9796 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9797 VM_Version::supports_avx512cd() && 9798 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9799 match(Set dst (CountLeadingZerosV src)); 9800 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9801 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9802 ins_encode %{ 9803 int vlen_enc = vector_length_encoding(this, $src); 9804 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9805 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9806 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9807 %} 9808 ins_pipe( pipe_slow ); 9809 %} 9810 9811 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9812 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9813 match(Set dst (CountLeadingZerosV src)); 9814 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9815 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9816 ins_encode %{ 9817 int vlen_enc = vector_length_encoding(this, $src); 9818 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9819 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9820 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9821 $rtmp$$Register, true, vlen_enc); 9822 %} 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9827 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9828 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9829 match(Set dst (CountLeadingZerosV src)); 9830 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9831 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9832 ins_encode %{ 9833 int vlen_enc = vector_length_encoding(this, $src); 9834 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9835 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9836 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9837 %} 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9842 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9843 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9844 match(Set dst (CountLeadingZerosV src)); 9845 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9846 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9847 ins_encode %{ 9848 int vlen_enc = vector_length_encoding(this, $src); 9849 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9850 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9851 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9852 %} 9853 ins_pipe( pipe_slow ); 9854 %} 9855 9856 // ---------------------------------- Vector Masked Operations ------------------------------------ 9857 9858 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9859 match(Set dst (AddVB (Binary dst src2) mask)); 9860 match(Set dst (AddVS (Binary dst src2) mask)); 9861 match(Set dst (AddVI (Binary dst src2) mask)); 9862 match(Set dst (AddVL (Binary dst src2) mask)); 9863 match(Set dst (AddVF (Binary dst src2) mask)); 9864 match(Set dst (AddVD (Binary dst src2) mask)); 9865 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9866 ins_encode %{ 9867 int vlen_enc = vector_length_encoding(this); 9868 BasicType bt = Matcher::vector_element_basic_type(this); 9869 int opc = this->ideal_Opcode(); 9870 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9871 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9872 %} 9873 ins_pipe( pipe_slow ); 9874 %} 9875 9876 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9877 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9878 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9879 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9880 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9881 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9882 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9883 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9884 ins_encode %{ 9885 int vlen_enc = vector_length_encoding(this); 9886 BasicType bt = Matcher::vector_element_basic_type(this); 9887 int opc = this->ideal_Opcode(); 9888 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9889 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9895 match(Set dst (XorV (Binary dst src2) mask)); 9896 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9897 ins_encode %{ 9898 int vlen_enc = vector_length_encoding(this); 9899 BasicType bt = Matcher::vector_element_basic_type(this); 9900 int opc = this->ideal_Opcode(); 9901 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9902 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9908 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9909 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9910 ins_encode %{ 9911 int vlen_enc = vector_length_encoding(this); 9912 BasicType bt = Matcher::vector_element_basic_type(this); 9913 int opc = this->ideal_Opcode(); 9914 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9915 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9916 %} 9917 ins_pipe( pipe_slow ); 9918 %} 9919 9920 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9921 match(Set dst (OrV (Binary dst src2) mask)); 9922 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9923 ins_encode %{ 9924 int vlen_enc = vector_length_encoding(this); 9925 BasicType bt = Matcher::vector_element_basic_type(this); 9926 int opc = this->ideal_Opcode(); 9927 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9928 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9929 %} 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9934 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9935 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9936 ins_encode %{ 9937 int vlen_enc = vector_length_encoding(this); 9938 BasicType bt = Matcher::vector_element_basic_type(this); 9939 int opc = this->ideal_Opcode(); 9940 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9941 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9942 %} 9943 ins_pipe( pipe_slow ); 9944 %} 9945 9946 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9947 match(Set dst (AndV (Binary dst src2) mask)); 9948 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9949 ins_encode %{ 9950 int vlen_enc = vector_length_encoding(this); 9951 BasicType bt = Matcher::vector_element_basic_type(this); 9952 int opc = this->ideal_Opcode(); 9953 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9954 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9955 %} 9956 ins_pipe( pipe_slow ); 9957 %} 9958 9959 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9960 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9961 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9962 ins_encode %{ 9963 int vlen_enc = vector_length_encoding(this); 9964 BasicType bt = Matcher::vector_element_basic_type(this); 9965 int opc = this->ideal_Opcode(); 9966 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9967 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9968 %} 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9973 match(Set dst (SubVB (Binary dst src2) mask)); 9974 match(Set dst (SubVS (Binary dst src2) mask)); 9975 match(Set dst (SubVI (Binary dst src2) mask)); 9976 match(Set dst (SubVL (Binary dst src2) mask)); 9977 match(Set dst (SubVF (Binary dst src2) mask)); 9978 match(Set dst (SubVD (Binary dst src2) mask)); 9979 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9980 ins_encode %{ 9981 int vlen_enc = vector_length_encoding(this); 9982 BasicType bt = Matcher::vector_element_basic_type(this); 9983 int opc = this->ideal_Opcode(); 9984 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9985 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9991 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9992 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9993 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9994 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9995 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9996 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9997 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9998 ins_encode %{ 9999 int vlen_enc = vector_length_encoding(this); 10000 BasicType bt = Matcher::vector_element_basic_type(this); 10001 int opc = this->ideal_Opcode(); 10002 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10003 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10004 %} 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 10009 match(Set dst (MulVS (Binary dst src2) mask)); 10010 match(Set dst (MulVI (Binary dst src2) mask)); 10011 match(Set dst (MulVL (Binary dst src2) mask)); 10012 match(Set dst (MulVF (Binary dst src2) mask)); 10013 match(Set dst (MulVD (Binary dst src2) mask)); 10014 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10015 ins_encode %{ 10016 int vlen_enc = vector_length_encoding(this); 10017 BasicType bt = Matcher::vector_element_basic_type(this); 10018 int opc = this->ideal_Opcode(); 10019 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10020 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10021 %} 10022 ins_pipe( pipe_slow ); 10023 %} 10024 10025 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10026 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10027 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10028 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10029 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10030 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10031 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10032 ins_encode %{ 10033 int vlen_enc = vector_length_encoding(this); 10034 BasicType bt = Matcher::vector_element_basic_type(this); 10035 int opc = this->ideal_Opcode(); 10036 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10037 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10038 %} 10039 ins_pipe( pipe_slow ); 10040 %} 10041 10042 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10043 match(Set dst (SqrtVF dst mask)); 10044 match(Set dst (SqrtVD dst mask)); 10045 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10046 ins_encode %{ 10047 int vlen_enc = vector_length_encoding(this); 10048 BasicType bt = Matcher::vector_element_basic_type(this); 10049 int opc = this->ideal_Opcode(); 10050 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10051 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10057 match(Set dst (DivVF (Binary dst src2) mask)); 10058 match(Set dst (DivVD (Binary dst src2) mask)); 10059 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10060 ins_encode %{ 10061 int vlen_enc = vector_length_encoding(this); 10062 BasicType bt = Matcher::vector_element_basic_type(this); 10063 int opc = this->ideal_Opcode(); 10064 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10065 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10071 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10072 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10073 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10074 ins_encode %{ 10075 int vlen_enc = vector_length_encoding(this); 10076 BasicType bt = Matcher::vector_element_basic_type(this); 10077 int opc = this->ideal_Opcode(); 10078 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10079 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10080 %} 10081 ins_pipe( pipe_slow ); 10082 %} 10083 10084 10085 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10086 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10087 match(Set dst (RotateRightV (Binary dst shift) mask)); 10088 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10089 ins_encode %{ 10090 int vlen_enc = vector_length_encoding(this); 10091 BasicType bt = Matcher::vector_element_basic_type(this); 10092 int opc = this->ideal_Opcode(); 10093 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10094 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10095 %} 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10100 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10101 match(Set dst (RotateRightV (Binary dst src2) mask)); 10102 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10103 ins_encode %{ 10104 int vlen_enc = vector_length_encoding(this); 10105 BasicType bt = Matcher::vector_element_basic_type(this); 10106 int opc = this->ideal_Opcode(); 10107 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10108 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10109 %} 10110 ins_pipe( pipe_slow ); 10111 %} 10112 10113 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10114 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10115 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10116 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10117 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10118 ins_encode %{ 10119 int vlen_enc = vector_length_encoding(this); 10120 BasicType bt = Matcher::vector_element_basic_type(this); 10121 int opc = this->ideal_Opcode(); 10122 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10123 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10129 predicate(!n->as_ShiftV()->is_var_shift()); 10130 match(Set dst (LShiftVS (Binary dst src2) mask)); 10131 match(Set dst (LShiftVI (Binary dst src2) mask)); 10132 match(Set dst (LShiftVL (Binary dst src2) mask)); 10133 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10134 ins_encode %{ 10135 int vlen_enc = vector_length_encoding(this); 10136 BasicType bt = Matcher::vector_element_basic_type(this); 10137 int opc = this->ideal_Opcode(); 10138 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10139 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10145 predicate(n->as_ShiftV()->is_var_shift()); 10146 match(Set dst (LShiftVS (Binary dst src2) mask)); 10147 match(Set dst (LShiftVI (Binary dst src2) mask)); 10148 match(Set dst (LShiftVL (Binary dst src2) mask)); 10149 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10150 ins_encode %{ 10151 int vlen_enc = vector_length_encoding(this); 10152 BasicType bt = Matcher::vector_element_basic_type(this); 10153 int opc = this->ideal_Opcode(); 10154 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10155 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10156 %} 10157 ins_pipe( pipe_slow ); 10158 %} 10159 10160 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10161 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10162 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10163 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10164 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10165 ins_encode %{ 10166 int vlen_enc = vector_length_encoding(this); 10167 BasicType bt = Matcher::vector_element_basic_type(this); 10168 int opc = this->ideal_Opcode(); 10169 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10170 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10171 %} 10172 ins_pipe( pipe_slow ); 10173 %} 10174 10175 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10176 predicate(!n->as_ShiftV()->is_var_shift()); 10177 match(Set dst (RShiftVS (Binary dst src2) mask)); 10178 match(Set dst (RShiftVI (Binary dst src2) mask)); 10179 match(Set dst (RShiftVL (Binary dst src2) mask)); 10180 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10181 ins_encode %{ 10182 int vlen_enc = vector_length_encoding(this); 10183 BasicType bt = Matcher::vector_element_basic_type(this); 10184 int opc = this->ideal_Opcode(); 10185 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10186 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10187 %} 10188 ins_pipe( pipe_slow ); 10189 %} 10190 10191 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10192 predicate(n->as_ShiftV()->is_var_shift()); 10193 match(Set dst (RShiftVS (Binary dst src2) mask)); 10194 match(Set dst (RShiftVI (Binary dst src2) mask)); 10195 match(Set dst (RShiftVL (Binary dst src2) mask)); 10196 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10197 ins_encode %{ 10198 int vlen_enc = vector_length_encoding(this); 10199 BasicType bt = Matcher::vector_element_basic_type(this); 10200 int opc = this->ideal_Opcode(); 10201 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10202 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10203 %} 10204 ins_pipe( pipe_slow ); 10205 %} 10206 10207 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10208 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10209 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10210 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10211 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10212 ins_encode %{ 10213 int vlen_enc = vector_length_encoding(this); 10214 BasicType bt = Matcher::vector_element_basic_type(this); 10215 int opc = this->ideal_Opcode(); 10216 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10217 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10218 %} 10219 ins_pipe( pipe_slow ); 10220 %} 10221 10222 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10223 predicate(!n->as_ShiftV()->is_var_shift()); 10224 match(Set dst (URShiftVS (Binary dst src2) mask)); 10225 match(Set dst (URShiftVI (Binary dst src2) mask)); 10226 match(Set dst (URShiftVL (Binary dst src2) mask)); 10227 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10228 ins_encode %{ 10229 int vlen_enc = vector_length_encoding(this); 10230 BasicType bt = Matcher::vector_element_basic_type(this); 10231 int opc = this->ideal_Opcode(); 10232 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10233 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10239 predicate(n->as_ShiftV()->is_var_shift()); 10240 match(Set dst (URShiftVS (Binary dst src2) mask)); 10241 match(Set dst (URShiftVI (Binary dst src2) mask)); 10242 match(Set dst (URShiftVL (Binary dst src2) mask)); 10243 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10244 ins_encode %{ 10245 int vlen_enc = vector_length_encoding(this); 10246 BasicType bt = Matcher::vector_element_basic_type(this); 10247 int opc = this->ideal_Opcode(); 10248 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10249 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10250 %} 10251 ins_pipe( pipe_slow ); 10252 %} 10253 10254 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10255 match(Set dst (MaxV (Binary dst src2) mask)); 10256 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10257 ins_encode %{ 10258 int vlen_enc = vector_length_encoding(this); 10259 BasicType bt = Matcher::vector_element_basic_type(this); 10260 int opc = this->ideal_Opcode(); 10261 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10262 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10263 %} 10264 ins_pipe( pipe_slow ); 10265 %} 10266 10267 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10268 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10269 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10270 ins_encode %{ 10271 int vlen_enc = vector_length_encoding(this); 10272 BasicType bt = Matcher::vector_element_basic_type(this); 10273 int opc = this->ideal_Opcode(); 10274 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10275 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10276 %} 10277 ins_pipe( pipe_slow ); 10278 %} 10279 10280 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10281 match(Set dst (MinV (Binary dst src2) mask)); 10282 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10283 ins_encode %{ 10284 int vlen_enc = vector_length_encoding(this); 10285 BasicType bt = Matcher::vector_element_basic_type(this); 10286 int opc = this->ideal_Opcode(); 10287 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10288 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10289 %} 10290 ins_pipe( pipe_slow ); 10291 %} 10292 10293 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10294 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10295 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10296 ins_encode %{ 10297 int vlen_enc = vector_length_encoding(this); 10298 BasicType bt = Matcher::vector_element_basic_type(this); 10299 int opc = this->ideal_Opcode(); 10300 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10301 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10302 %} 10303 ins_pipe( pipe_slow ); 10304 %} 10305 10306 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10307 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10308 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10309 ins_encode %{ 10310 int vlen_enc = vector_length_encoding(this); 10311 BasicType bt = Matcher::vector_element_basic_type(this); 10312 int opc = this->ideal_Opcode(); 10313 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10314 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10315 %} 10316 ins_pipe( pipe_slow ); 10317 %} 10318 10319 instruct vabs_masked(vec dst, kReg mask) %{ 10320 match(Set dst (AbsVB dst mask)); 10321 match(Set dst (AbsVS dst mask)); 10322 match(Set dst (AbsVI dst mask)); 10323 match(Set dst (AbsVL dst mask)); 10324 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10325 ins_encode %{ 10326 int vlen_enc = vector_length_encoding(this); 10327 BasicType bt = Matcher::vector_element_basic_type(this); 10328 int opc = this->ideal_Opcode(); 10329 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10330 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10331 %} 10332 ins_pipe( pipe_slow ); 10333 %} 10334 10335 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10336 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10337 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10338 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10339 ins_encode %{ 10340 assert(UseFMA, "Needs FMA instructions support."); 10341 int vlen_enc = vector_length_encoding(this); 10342 BasicType bt = Matcher::vector_element_basic_type(this); 10343 int opc = this->ideal_Opcode(); 10344 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10345 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10346 %} 10347 ins_pipe( pipe_slow ); 10348 %} 10349 10350 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10351 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10352 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10353 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10354 ins_encode %{ 10355 assert(UseFMA, "Needs FMA instructions support."); 10356 int vlen_enc = vector_length_encoding(this); 10357 BasicType bt = Matcher::vector_element_basic_type(this); 10358 int opc = this->ideal_Opcode(); 10359 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10360 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10361 %} 10362 ins_pipe( pipe_slow ); 10363 %} 10364 10365 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10366 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10367 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10368 ins_encode %{ 10369 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10370 int vlen_enc = vector_length_encoding(this, $src1); 10371 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10372 10373 // Comparison i 10374 switch (src1_elem_bt) { 10375 case T_BYTE: { 10376 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10377 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10378 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10379 break; 10380 } 10381 case T_SHORT: { 10382 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10383 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10384 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10385 break; 10386 } 10387 case T_INT: { 10388 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10389 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10390 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10391 break; 10392 } 10393 case T_LONG: { 10394 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10395 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10396 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10397 break; 10398 } 10399 case T_FLOAT: { 10400 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10401 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10402 break; 10403 } 10404 case T_DOUBLE: { 10405 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10406 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10407 break; 10408 } 10409 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10410 } 10411 %} 10412 ins_pipe( pipe_slow ); 10413 %} 10414 10415 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10416 predicate(Matcher::vector_length(n) <= 32); 10417 match(Set dst (MaskAll src)); 10418 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10419 ins_encode %{ 10420 int mask_len = Matcher::vector_length(this); 10421 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10422 %} 10423 ins_pipe( pipe_slow ); 10424 %} 10425 10426 #ifdef _LP64 10427 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10428 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10429 match(Set dst (XorVMask src (MaskAll cnt))); 10430 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10431 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10432 ins_encode %{ 10433 uint masklen = Matcher::vector_length(this); 10434 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10435 %} 10436 ins_pipe( pipe_slow ); 10437 %} 10438 10439 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10440 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10441 (Matcher::vector_length(n) == 16) || 10442 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10443 match(Set dst (XorVMask src (MaskAll cnt))); 10444 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10445 ins_encode %{ 10446 uint masklen = Matcher::vector_length(this); 10447 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10448 %} 10449 ins_pipe( pipe_slow ); 10450 %} 10451 10452 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10453 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10454 match(Set dst (VectorLongToMask src)); 10455 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10456 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10457 ins_encode %{ 10458 int mask_len = Matcher::vector_length(this); 10459 int vec_enc = vector_length_encoding(mask_len); 10460 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10461 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10462 %} 10463 ins_pipe( pipe_slow ); 10464 %} 10465 10466 10467 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10468 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10469 match(Set dst (VectorLongToMask src)); 10470 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10471 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10472 ins_encode %{ 10473 int mask_len = Matcher::vector_length(this); 10474 assert(mask_len <= 32, "invalid mask length"); 10475 int vec_enc = vector_length_encoding(mask_len); 10476 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10477 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10478 %} 10479 ins_pipe( pipe_slow ); 10480 %} 10481 10482 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10483 predicate(n->bottom_type()->isa_vectmask()); 10484 match(Set dst (VectorLongToMask src)); 10485 format %{ "long_to_mask_evex $dst, $src\t!" %} 10486 ins_encode %{ 10487 __ kmov($dst$$KRegister, $src$$Register); 10488 %} 10489 ins_pipe( pipe_slow ); 10490 %} 10491 #endif 10492 10493 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10494 match(Set dst (AndVMask src1 src2)); 10495 match(Set dst (OrVMask src1 src2)); 10496 match(Set dst (XorVMask src1 src2)); 10497 effect(TEMP kscratch); 10498 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10499 ins_encode %{ 10500 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10501 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10502 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10503 uint masklen = Matcher::vector_length(this); 10504 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10505 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10506 %} 10507 ins_pipe( pipe_slow ); 10508 %} 10509 10510 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10511 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10512 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10513 ins_encode %{ 10514 int vlen_enc = vector_length_encoding(this); 10515 BasicType bt = Matcher::vector_element_basic_type(this); 10516 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10517 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10518 %} 10519 ins_pipe( pipe_slow ); 10520 %} 10521 10522 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10523 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10524 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10525 ins_encode %{ 10526 int vlen_enc = vector_length_encoding(this); 10527 BasicType bt = Matcher::vector_element_basic_type(this); 10528 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10529 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10530 %} 10531 ins_pipe( pipe_slow ); 10532 %} 10533 10534 instruct castMM(kReg dst) 10535 %{ 10536 match(Set dst (CastVV dst)); 10537 10538 size(0); 10539 format %{ "# castVV of $dst" %} 10540 ins_encode(/* empty encoding */); 10541 ins_cost(0); 10542 ins_pipe(empty); 10543 %} 10544 10545 instruct castVV(vec dst) 10546 %{ 10547 match(Set dst (CastVV dst)); 10548 10549 size(0); 10550 format %{ "# castVV of $dst" %} 10551 ins_encode(/* empty encoding */); 10552 ins_cost(0); 10553 ins_pipe(empty); 10554 %} 10555 10556 instruct castVVLeg(legVec dst) 10557 %{ 10558 match(Set dst (CastVV dst)); 10559 10560 size(0); 10561 format %{ "# castVV of $dst" %} 10562 ins_encode(/* empty encoding */); 10563 ins_cost(0); 10564 ins_pipe(empty); 10565 %} 10566 10567 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10568 %{ 10569 match(Set dst (IsInfiniteF src)); 10570 effect(TEMP ktmp, KILL cr); 10571 format %{ "float_class_check $dst, $src" %} 10572 ins_encode %{ 10573 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10574 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10575 %} 10576 ins_pipe(pipe_slow); 10577 %} 10578 10579 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10580 %{ 10581 match(Set dst (IsInfiniteD src)); 10582 effect(TEMP ktmp, KILL cr); 10583 format %{ "double_class_check $dst, $src" %} 10584 ins_encode %{ 10585 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10586 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10587 %} 10588 ins_pipe(pipe_slow); 10589 %} 10590 10591 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10592 %{ 10593 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10594 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10595 match(Set dst (SaturatingAddV src1 src2)); 10596 match(Set dst (SaturatingSubV src1 src2)); 10597 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10598 ins_encode %{ 10599 int vlen_enc = vector_length_encoding(this); 10600 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10601 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10602 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10603 %} 10604 ins_pipe(pipe_slow); 10605 %} 10606 10607 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10608 %{ 10609 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10610 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10611 match(Set dst (SaturatingAddV src1 src2)); 10612 match(Set dst (SaturatingSubV src1 src2)); 10613 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10614 ins_encode %{ 10615 int vlen_enc = vector_length_encoding(this); 10616 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10617 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10618 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10619 %} 10620 ins_pipe(pipe_slow); 10621 %} 10622 10623 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10624 %{ 10625 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10626 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10627 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10628 match(Set dst (SaturatingAddV src1 src2)); 10629 match(Set dst (SaturatingSubV src1 src2)); 10630 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10631 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10632 ins_encode %{ 10633 int vlen_enc = vector_length_encoding(this); 10634 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10635 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10636 $src1$$XMMRegister, $src2$$XMMRegister, 10637 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10638 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10639 %} 10640 ins_pipe(pipe_slow); 10641 %} 10642 10643 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10644 %{ 10645 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10646 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10647 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10648 match(Set dst (SaturatingAddV src1 src2)); 10649 match(Set dst (SaturatingSubV src1 src2)); 10650 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10651 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10652 ins_encode %{ 10653 int vlen_enc = vector_length_encoding(this); 10654 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10655 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10656 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10657 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10658 %} 10659 ins_pipe(pipe_slow); 10660 %} 10661 10662 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10663 %{ 10664 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10665 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10666 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10667 match(Set dst (SaturatingAddV src1 src2)); 10668 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10669 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10670 ins_encode %{ 10671 int vlen_enc = vector_length_encoding(this); 10672 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10673 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10674 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10675 %} 10676 ins_pipe(pipe_slow); 10677 %} 10678 10679 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10680 %{ 10681 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10682 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10683 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10684 match(Set dst (SaturatingAddV src1 src2)); 10685 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10686 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10687 ins_encode %{ 10688 int vlen_enc = vector_length_encoding(this); 10689 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10690 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10691 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10692 %} 10693 ins_pipe(pipe_slow); 10694 %} 10695 10696 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10697 %{ 10698 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10699 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10700 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10701 match(Set dst (SaturatingSubV src1 src2)); 10702 effect(TEMP ktmp); 10703 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10704 ins_encode %{ 10705 int vlen_enc = vector_length_encoding(this); 10706 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10707 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10708 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10709 %} 10710 ins_pipe(pipe_slow); 10711 %} 10712 10713 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10714 %{ 10715 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10716 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10717 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10718 match(Set dst (SaturatingSubV src1 src2)); 10719 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10720 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10721 ins_encode %{ 10722 int vlen_enc = vector_length_encoding(this); 10723 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10724 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10725 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10726 %} 10727 ins_pipe(pipe_slow); 10728 %} 10729 10730 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10731 %{ 10732 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10733 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10734 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10735 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10736 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10737 ins_encode %{ 10738 int vlen_enc = vector_length_encoding(this); 10739 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10740 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10741 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10742 %} 10743 ins_pipe(pipe_slow); 10744 %} 10745 10746 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10747 %{ 10748 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10749 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10750 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10751 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10752 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10753 ins_encode %{ 10754 int vlen_enc = vector_length_encoding(this); 10755 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10756 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10757 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10758 %} 10759 ins_pipe(pipe_slow); 10760 %} 10761 10762 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10763 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10764 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10765 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10766 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10767 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10768 ins_encode %{ 10769 int vlen_enc = vector_length_encoding(this); 10770 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10771 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10772 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10773 %} 10774 ins_pipe( pipe_slow ); 10775 %} 10776 10777 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10778 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10779 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10780 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10781 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10782 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10783 ins_encode %{ 10784 int vlen_enc = vector_length_encoding(this); 10785 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10786 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10787 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10788 %} 10789 ins_pipe( pipe_slow ); 10790 %} 10791 10792 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10793 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10794 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10795 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10796 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10797 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10798 ins_encode %{ 10799 int vlen_enc = vector_length_encoding(this); 10800 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10801 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10802 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10803 %} 10804 ins_pipe( pipe_slow ); 10805 %} 10806 10807 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10808 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10809 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10810 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10811 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10812 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10813 ins_encode %{ 10814 int vlen_enc = vector_length_encoding(this); 10815 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10816 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10817 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10818 %} 10819 ins_pipe( pipe_slow ); 10820 %} 10821 10822 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10823 %{ 10824 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10825 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10826 ins_encode %{ 10827 int vlen_enc = vector_length_encoding(this); 10828 BasicType bt = Matcher::vector_element_basic_type(this); 10829 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10830 %} 10831 ins_pipe(pipe_slow); 10832 %}