1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1835 return false; 1836 } 1837 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1838 return false; 1839 } 1840 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1841 return false; 1842 } 1843 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1844 return false; 1845 } 1846 break; 1847 case Op_MaskAll: 1848 if (!VM_Version::supports_evex()) { 1849 return false; 1850 } 1851 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1852 return false; 1853 } 1854 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1855 return false; 1856 } 1857 break; 1858 case Op_VectorMaskCmp: 1859 if (vlen < 2 || size_in_bits < 32) { 1860 return false; 1861 } 1862 break; 1863 case Op_CompressM: 1864 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1865 return false; 1866 } 1867 break; 1868 case Op_CompressV: 1869 case Op_ExpandV: 1870 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1871 return false; 1872 } 1873 if (size_in_bits < 128 ) { 1874 return false; 1875 } 1876 case Op_VectorLongToMask: 1877 if (UseAVX < 1) { 1878 return false; 1879 } 1880 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1881 return false; 1882 } 1883 break; 1884 case Op_SignumVD: 1885 case Op_SignumVF: 1886 if (UseAVX < 1) { 1887 return false; 1888 } 1889 break; 1890 case Op_PopCountVI: 1891 case Op_PopCountVL: { 1892 if (!is_pop_count_instr_target(bt) && 1893 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1894 return false; 1895 } 1896 } 1897 break; 1898 case Op_ReverseV: 1899 case Op_ReverseBytesV: 1900 if (UseAVX < 2) { 1901 return false; 1902 } 1903 break; 1904 case Op_CountTrailingZerosV: 1905 case Op_CountLeadingZerosV: 1906 if (UseAVX < 2) { 1907 return false; 1908 } 1909 break; 1910 } 1911 return true; // Per default match rules are supported. 1912 } 1913 1914 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1915 // ADLC based match_rule_supported routine checks for the existence of pattern based 1916 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1917 // of their non-masked counterpart with mask edge being the differentiator. 1918 // This routine does a strict check on the existence of masked operation patterns 1919 // by returning a default false value for all the other opcodes apart from the 1920 // ones whose masked instruction patterns are defined in this file. 1921 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1922 return false; 1923 } 1924 1925 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1926 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 switch(opcode) { 1930 // Unary masked operations 1931 case Op_AbsVB: 1932 case Op_AbsVS: 1933 if(!VM_Version::supports_avx512bw()) { 1934 return false; // Implementation limitation 1935 } 1936 case Op_AbsVI: 1937 case Op_AbsVL: 1938 return true; 1939 1940 // Ternary masked operations 1941 case Op_FmaVF: 1942 case Op_FmaVD: 1943 return true; 1944 1945 case Op_MacroLogicV: 1946 if(bt != T_INT && bt != T_LONG) { 1947 return false; 1948 } 1949 return true; 1950 1951 // Binary masked operations 1952 case Op_AddVB: 1953 case Op_AddVS: 1954 case Op_SubVB: 1955 case Op_SubVS: 1956 case Op_MulVS: 1957 case Op_LShiftVS: 1958 case Op_RShiftVS: 1959 case Op_URShiftVS: 1960 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1961 if (!VM_Version::supports_avx512bw()) { 1962 return false; // Implementation limitation 1963 } 1964 return true; 1965 1966 case Op_MulVL: 1967 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1968 if (!VM_Version::supports_avx512dq()) { 1969 return false; // Implementation limitation 1970 } 1971 return true; 1972 1973 case Op_AndV: 1974 case Op_OrV: 1975 case Op_XorV: 1976 case Op_RotateRightV: 1977 case Op_RotateLeftV: 1978 if (bt != T_INT && bt != T_LONG) { 1979 return false; // Implementation limitation 1980 } 1981 return true; 1982 1983 case Op_VectorLoadMask: 1984 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1985 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 return true; 1989 1990 case Op_AddVI: 1991 case Op_AddVL: 1992 case Op_AddVF: 1993 case Op_AddVD: 1994 case Op_SubVI: 1995 case Op_SubVL: 1996 case Op_SubVF: 1997 case Op_SubVD: 1998 case Op_MulVI: 1999 case Op_MulVF: 2000 case Op_MulVD: 2001 case Op_DivVF: 2002 case Op_DivVD: 2003 case Op_SqrtVF: 2004 case Op_SqrtVD: 2005 case Op_LShiftVI: 2006 case Op_LShiftVL: 2007 case Op_RShiftVI: 2008 case Op_RShiftVL: 2009 case Op_URShiftVI: 2010 case Op_URShiftVL: 2011 case Op_LoadVectorMasked: 2012 case Op_StoreVectorMasked: 2013 case Op_LoadVectorGatherMasked: 2014 case Op_StoreVectorScatterMasked: 2015 return true; 2016 2017 case Op_UMinV: 2018 case Op_UMaxV: 2019 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2020 return false; 2021 } // fallthrough 2022 case Op_MaxV: 2023 case Op_MinV: 2024 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2025 return false; // Implementation limitation 2026 } 2027 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { 2028 return false; // Implementation limitation 2029 } 2030 return true; 2031 case Op_SaturatingAddV: 2032 case Op_SaturatingSubV: 2033 if (!is_subword_type(bt)) { 2034 return false; 2035 } 2036 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2037 return false; // Implementation limitation 2038 } 2039 return true; 2040 2041 case Op_VectorMaskCmp: 2042 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 return true; 2046 2047 case Op_VectorRearrange: 2048 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2049 return false; // Implementation limitation 2050 } 2051 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2052 return false; // Implementation limitation 2053 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2054 return false; // Implementation limitation 2055 } 2056 return true; 2057 2058 // Binary Logical operations 2059 case Op_AndVMask: 2060 case Op_OrVMask: 2061 case Op_XorVMask: 2062 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2063 return false; // Implementation limitation 2064 } 2065 return true; 2066 2067 case Op_PopCountVI: 2068 case Op_PopCountVL: 2069 if (!is_pop_count_instr_target(bt)) { 2070 return false; 2071 } 2072 return true; 2073 2074 case Op_MaskAll: 2075 return true; 2076 2077 case Op_CountLeadingZerosV: 2078 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2079 return true; 2080 } 2081 default: 2082 return false; 2083 } 2084 } 2085 2086 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2087 return false; 2088 } 2089 2090 // Return true if Vector::rearrange needs preparation of the shuffle argument 2091 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2092 switch (elem_bt) { 2093 case T_BYTE: return false; 2094 case T_SHORT: return !VM_Version::supports_avx512bw(); 2095 case T_INT: return !VM_Version::supports_avx(); 2096 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2097 default: 2098 ShouldNotReachHere(); 2099 return false; 2100 } 2101 } 2102 2103 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2104 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2105 bool legacy = (generic_opnd->opcode() == LEGVEC); 2106 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2107 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2108 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2109 return new legVecZOper(); 2110 } 2111 if (legacy) { 2112 switch (ideal_reg) { 2113 case Op_VecS: return new legVecSOper(); 2114 case Op_VecD: return new legVecDOper(); 2115 case Op_VecX: return new legVecXOper(); 2116 case Op_VecY: return new legVecYOper(); 2117 case Op_VecZ: return new legVecZOper(); 2118 } 2119 } else { 2120 switch (ideal_reg) { 2121 case Op_VecS: return new vecSOper(); 2122 case Op_VecD: return new vecDOper(); 2123 case Op_VecX: return new vecXOper(); 2124 case Op_VecY: return new vecYOper(); 2125 case Op_VecZ: return new vecZOper(); 2126 } 2127 } 2128 ShouldNotReachHere(); 2129 return nullptr; 2130 } 2131 2132 bool Matcher::is_reg2reg_move(MachNode* m) { 2133 switch (m->rule()) { 2134 case MoveVec2Leg_rule: 2135 case MoveLeg2Vec_rule: 2136 case MoveF2VL_rule: 2137 case MoveF2LEG_rule: 2138 case MoveVL2F_rule: 2139 case MoveLEG2F_rule: 2140 case MoveD2VL_rule: 2141 case MoveD2LEG_rule: 2142 case MoveVL2D_rule: 2143 case MoveLEG2D_rule: 2144 return true; 2145 default: 2146 return false; 2147 } 2148 } 2149 2150 bool Matcher::is_generic_vector(MachOper* opnd) { 2151 switch (opnd->opcode()) { 2152 case VEC: 2153 case LEGVEC: 2154 return true; 2155 default: 2156 return false; 2157 } 2158 } 2159 2160 //------------------------------------------------------------------------ 2161 2162 const RegMask* Matcher::predicate_reg_mask(void) { 2163 return &_VECTMASK_REG_mask; 2164 } 2165 2166 // Max vector size in bytes. 0 if not supported. 2167 int Matcher::vector_width_in_bytes(BasicType bt) { 2168 assert(is_java_primitive(bt), "only primitive type vectors"); 2169 // SSE2 supports 128bit vectors for all types. 2170 // AVX2 supports 256bit vectors for all types. 2171 // AVX2/EVEX supports 512bit vectors for all types. 2172 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2173 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2174 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2175 size = (UseAVX > 2) ? 64 : 32; 2176 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2177 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2178 // Use flag to limit vector size. 2179 size = MIN2(size,(int)MaxVectorSize); 2180 // Minimum 2 values in vector (or 4 for bytes). 2181 switch (bt) { 2182 case T_DOUBLE: 2183 case T_LONG: 2184 if (size < 16) return 0; 2185 break; 2186 case T_FLOAT: 2187 case T_INT: 2188 if (size < 8) return 0; 2189 break; 2190 case T_BOOLEAN: 2191 if (size < 4) return 0; 2192 break; 2193 case T_CHAR: 2194 if (size < 4) return 0; 2195 break; 2196 case T_BYTE: 2197 if (size < 4) return 0; 2198 break; 2199 case T_SHORT: 2200 if (size < 4) return 0; 2201 break; 2202 default: 2203 ShouldNotReachHere(); 2204 } 2205 return size; 2206 } 2207 2208 // Limits on vector size (number of elements) loaded into vector. 2209 int Matcher::max_vector_size(const BasicType bt) { 2210 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2211 } 2212 int Matcher::min_vector_size(const BasicType bt) { 2213 int max_size = max_vector_size(bt); 2214 // Min size which can be loaded into vector is 4 bytes. 2215 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2216 // Support for calling svml double64 vectors 2217 if (bt == T_DOUBLE) { 2218 size = 1; 2219 } 2220 return MIN2(size,max_size); 2221 } 2222 2223 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2224 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2225 // by default on Cascade Lake 2226 if (VM_Version::is_default_intel_cascade_lake()) { 2227 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2228 } 2229 return Matcher::max_vector_size(bt); 2230 } 2231 2232 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2233 return -1; 2234 } 2235 2236 // Vector ideal reg corresponding to specified size in bytes 2237 uint Matcher::vector_ideal_reg(int size) { 2238 assert(MaxVectorSize >= size, ""); 2239 switch(size) { 2240 case 4: return Op_VecS; 2241 case 8: return Op_VecD; 2242 case 16: return Op_VecX; 2243 case 32: return Op_VecY; 2244 case 64: return Op_VecZ; 2245 } 2246 ShouldNotReachHere(); 2247 return 0; 2248 } 2249 2250 // Check for shift by small constant as well 2251 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2252 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2253 shift->in(2)->get_int() <= 3 && 2254 // Are there other uses besides address expressions? 2255 !matcher->is_visited(shift)) { 2256 address_visited.set(shift->_idx); // Flag as address_visited 2257 mstack.push(shift->in(2), Matcher::Visit); 2258 Node *conv = shift->in(1); 2259 // Allow Matcher to match the rule which bypass 2260 // ConvI2L operation for an array index on LP64 2261 // if the index value is positive. 2262 if (conv->Opcode() == Op_ConvI2L && 2263 conv->as_Type()->type()->is_long()->_lo >= 0 && 2264 // Are there other uses besides address expressions? 2265 !matcher->is_visited(conv)) { 2266 address_visited.set(conv->_idx); // Flag as address_visited 2267 mstack.push(conv->in(1), Matcher::Pre_Visit); 2268 } else { 2269 mstack.push(conv, Matcher::Pre_Visit); 2270 } 2271 return true; 2272 } 2273 return false; 2274 } 2275 2276 // This function identifies sub-graphs in which a 'load' node is 2277 // input to two different nodes, and such that it can be matched 2278 // with BMI instructions like blsi, blsr, etc. 2279 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2280 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2281 // refers to the same node. 2282 // 2283 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2284 // This is a temporary solution until we make DAGs expressible in ADL. 2285 template<typename ConType> 2286 class FusedPatternMatcher { 2287 Node* _op1_node; 2288 Node* _mop_node; 2289 int _con_op; 2290 2291 static int match_next(Node* n, int next_op, int next_op_idx) { 2292 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2293 return -1; 2294 } 2295 2296 if (next_op_idx == -1) { // n is commutative, try rotations 2297 if (n->in(1)->Opcode() == next_op) { 2298 return 1; 2299 } else if (n->in(2)->Opcode() == next_op) { 2300 return 2; 2301 } 2302 } else { 2303 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2304 if (n->in(next_op_idx)->Opcode() == next_op) { 2305 return next_op_idx; 2306 } 2307 } 2308 return -1; 2309 } 2310 2311 public: 2312 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2313 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2314 2315 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2316 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2317 typename ConType::NativeType con_value) { 2318 if (_op1_node->Opcode() != op1) { 2319 return false; 2320 } 2321 if (_mop_node->outcnt() > 2) { 2322 return false; 2323 } 2324 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2325 if (op1_op2_idx == -1) { 2326 return false; 2327 } 2328 // Memory operation must be the other edge 2329 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2330 2331 // Check that the mop node is really what we want 2332 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2333 Node* op2_node = _op1_node->in(op1_op2_idx); 2334 if (op2_node->outcnt() > 1) { 2335 return false; 2336 } 2337 assert(op2_node->Opcode() == op2, "Should be"); 2338 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2339 if (op2_con_idx == -1) { 2340 return false; 2341 } 2342 // Memory operation must be the other edge 2343 int op2_mop_idx = (op2_con_idx & 1) + 1; 2344 // Check that the memory operation is the same node 2345 if (op2_node->in(op2_mop_idx) == _mop_node) { 2346 // Now check the constant 2347 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2348 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2349 return true; 2350 } 2351 } 2352 } 2353 return false; 2354 } 2355 }; 2356 2357 static bool is_bmi_pattern(Node* n, Node* m) { 2358 assert(UseBMI1Instructions, "sanity"); 2359 if (n != nullptr && m != nullptr) { 2360 if (m->Opcode() == Op_LoadI) { 2361 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2362 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2363 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2364 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2365 } else if (m->Opcode() == Op_LoadL) { 2366 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2367 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2368 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2369 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2370 } 2371 } 2372 return false; 2373 } 2374 2375 // Should the matcher clone input 'm' of node 'n'? 2376 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2377 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2378 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2379 mstack.push(m, Visit); 2380 return true; 2381 } 2382 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2383 mstack.push(m, Visit); // m = ShiftCntV 2384 return true; 2385 } 2386 if (is_encode_and_store_pattern(n, m)) { 2387 mstack.push(m, Visit); 2388 return true; 2389 } 2390 return false; 2391 } 2392 2393 // Should the Matcher clone shifts on addressing modes, expecting them 2394 // to be subsumed into complex addressing expressions or compute them 2395 // into registers? 2396 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2397 Node *off = m->in(AddPNode::Offset); 2398 if (off->is_Con()) { 2399 address_visited.test_set(m->_idx); // Flag as address_visited 2400 Node *adr = m->in(AddPNode::Address); 2401 2402 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2403 // AtomicAdd is not an addressing expression. 2404 // Cheap to find it by looking for screwy base. 2405 if (adr->is_AddP() && 2406 !adr->in(AddPNode::Base)->is_top() && 2407 !adr->in(AddPNode::Offset)->is_Con() && 2408 off->get_long() == (int) (off->get_long()) && // immL32 2409 // Are there other uses besides address expressions? 2410 !is_visited(adr)) { 2411 address_visited.set(adr->_idx); // Flag as address_visited 2412 Node *shift = adr->in(AddPNode::Offset); 2413 if (!clone_shift(shift, this, mstack, address_visited)) { 2414 mstack.push(shift, Pre_Visit); 2415 } 2416 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2417 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2418 } else { 2419 mstack.push(adr, Pre_Visit); 2420 } 2421 2422 // Clone X+offset as it also folds into most addressing expressions 2423 mstack.push(off, Visit); 2424 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2425 return true; 2426 } else if (clone_shift(off, this, mstack, address_visited)) { 2427 address_visited.test_set(m->_idx); // Flag as address_visited 2428 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2429 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2436 switch (bt) { 2437 case BoolTest::eq: 2438 return Assembler::eq; 2439 case BoolTest::ne: 2440 return Assembler::neq; 2441 case BoolTest::le: 2442 case BoolTest::ule: 2443 return Assembler::le; 2444 case BoolTest::ge: 2445 case BoolTest::uge: 2446 return Assembler::nlt; 2447 case BoolTest::lt: 2448 case BoolTest::ult: 2449 return Assembler::lt; 2450 case BoolTest::gt: 2451 case BoolTest::ugt: 2452 return Assembler::nle; 2453 default : ShouldNotReachHere(); return Assembler::_false; 2454 } 2455 } 2456 2457 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2458 switch (bt) { 2459 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2460 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2461 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2462 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2463 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2464 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2465 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2466 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2467 } 2468 } 2469 2470 // Helper methods for MachSpillCopyNode::implementation(). 2471 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2472 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2473 assert(ireg == Op_VecS || // 32bit vector 2474 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2475 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2476 "no non-adjacent vector moves" ); 2477 if (masm) { 2478 switch (ireg) { 2479 case Op_VecS: // copy whole register 2480 case Op_VecD: 2481 case Op_VecX: 2482 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2483 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2484 } else { 2485 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2486 } 2487 break; 2488 case Op_VecY: 2489 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2490 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2491 } else { 2492 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2493 } 2494 break; 2495 case Op_VecZ: 2496 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2497 break; 2498 default: 2499 ShouldNotReachHere(); 2500 } 2501 #ifndef PRODUCT 2502 } else { 2503 switch (ireg) { 2504 case Op_VecS: 2505 case Op_VecD: 2506 case Op_VecX: 2507 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2508 break; 2509 case Op_VecY: 2510 case Op_VecZ: 2511 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2512 break; 2513 default: 2514 ShouldNotReachHere(); 2515 } 2516 #endif 2517 } 2518 } 2519 2520 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2521 int stack_offset, int reg, uint ireg, outputStream* st) { 2522 if (masm) { 2523 if (is_load) { 2524 switch (ireg) { 2525 case Op_VecS: 2526 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2527 break; 2528 case Op_VecD: 2529 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecX: 2532 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2533 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2534 } else { 2535 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2536 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2537 } 2538 break; 2539 case Op_VecY: 2540 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2541 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2542 } else { 2543 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2544 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2545 } 2546 break; 2547 case Op_VecZ: 2548 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2549 break; 2550 default: 2551 ShouldNotReachHere(); 2552 } 2553 } else { // store 2554 switch (ireg) { 2555 case Op_VecS: 2556 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2557 break; 2558 case Op_VecD: 2559 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecX: 2562 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2563 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2564 } 2565 else { 2566 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2567 } 2568 break; 2569 case Op_VecY: 2570 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2571 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2572 } 2573 else { 2574 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2575 } 2576 break; 2577 case Op_VecZ: 2578 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2579 break; 2580 default: 2581 ShouldNotReachHere(); 2582 } 2583 } 2584 #ifndef PRODUCT 2585 } else { 2586 if (is_load) { 2587 switch (ireg) { 2588 case Op_VecS: 2589 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2590 break; 2591 case Op_VecD: 2592 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecX: 2595 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecY: 2598 case Op_VecZ: 2599 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2600 break; 2601 default: 2602 ShouldNotReachHere(); 2603 } 2604 } else { // store 2605 switch (ireg) { 2606 case Op_VecS: 2607 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2608 break; 2609 case Op_VecD: 2610 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecX: 2613 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecY: 2616 case Op_VecZ: 2617 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 } 2623 #endif 2624 } 2625 } 2626 2627 template <class T> 2628 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2629 int size = type2aelembytes(bt) * len; 2630 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2631 for (int i = 0; i < len; i++) { 2632 int offset = i * type2aelembytes(bt); 2633 switch (bt) { 2634 case T_BYTE: val->at(i) = con; break; 2635 case T_SHORT: { 2636 jshort c = con; 2637 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2638 break; 2639 } 2640 case T_INT: { 2641 jint c = con; 2642 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2643 break; 2644 } 2645 case T_LONG: { 2646 jlong c = con; 2647 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2648 break; 2649 } 2650 case T_FLOAT: { 2651 jfloat c = con; 2652 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2653 break; 2654 } 2655 case T_DOUBLE: { 2656 jdouble c = con; 2657 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2658 break; 2659 } 2660 default: assert(false, "%s", type2name(bt)); 2661 } 2662 } 2663 return val; 2664 } 2665 2666 static inline jlong high_bit_set(BasicType bt) { 2667 switch (bt) { 2668 case T_BYTE: return 0x8080808080808080; 2669 case T_SHORT: return 0x8000800080008000; 2670 case T_INT: return 0x8000000080000000; 2671 case T_LONG: return 0x8000000000000000; 2672 default: 2673 ShouldNotReachHere(); 2674 return 0; 2675 } 2676 } 2677 2678 #ifndef PRODUCT 2679 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2680 st->print("nop \t# %d bytes pad for loops and calls", _count); 2681 } 2682 #endif 2683 2684 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2685 __ nop(_count); 2686 } 2687 2688 uint MachNopNode::size(PhaseRegAlloc*) const { 2689 return _count; 2690 } 2691 2692 #ifndef PRODUCT 2693 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2694 st->print("# breakpoint"); 2695 } 2696 #endif 2697 2698 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2699 __ int3(); 2700 } 2701 2702 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2703 return MachNode::size(ra_); 2704 } 2705 2706 %} 2707 2708 encode %{ 2709 2710 enc_class call_epilog %{ 2711 if (VerifyStackAtCalls) { 2712 // Check that stack depth is unchanged: find majik cookie on stack 2713 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2714 Label L; 2715 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2716 __ jccb(Assembler::equal, L); 2717 // Die if stack mismatch 2718 __ int3(); 2719 __ bind(L); 2720 } 2721 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) { 2722 // The last return value is not set by the callee but used to pass the null marker to compiled code. 2723 // Search for the corresponding projection, get the register and emit code that initialized it. 2724 uint con = (tf()->range_cc()->cnt() - 1); 2725 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2726 ProjNode* proj = fast_out(i)->as_Proj(); 2727 if (proj->_con == con) { 2728 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized) 2729 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2730 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2731 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2732 __ testq(rax, rax); 2733 __ setb(Assembler::notZero, toReg); 2734 __ movzbl(toReg, toReg); 2735 if (reg->is_stack()) { 2736 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2737 __ movq(Address(rsp, st_off), toReg); 2738 } 2739 break; 2740 } 2741 } 2742 if (return_value_is_used()) { 2743 // An inline type is returned as fields in multiple registers. 2744 // Rax either contains an oop if the inline type is buffered or a pointer 2745 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2746 // if the lowest bit is set to allow C2 to use the oop after null checking. 2747 // rax &= (rax & 1) - 1 2748 __ movptr(rscratch1, rax); 2749 __ andptr(rscratch1, 0x1); 2750 __ subptr(rscratch1, 0x1); 2751 __ andptr(rax, rscratch1); 2752 } 2753 } 2754 %} 2755 2756 %} 2757 2758 // Operands for bound floating pointer register arguments 2759 operand rxmm0() %{ 2760 constraint(ALLOC_IN_RC(xmm0_reg)); 2761 match(VecX); 2762 format%{%} 2763 interface(REG_INTER); 2764 %} 2765 2766 //----------OPERANDS----------------------------------------------------------- 2767 // Operand definitions must precede instruction definitions for correct parsing 2768 // in the ADLC because operands constitute user defined types which are used in 2769 // instruction definitions. 2770 2771 // Vectors 2772 2773 // Dummy generic vector class. Should be used for all vector operands. 2774 // Replaced with vec[SDXYZ] during post-selection pass. 2775 operand vec() %{ 2776 constraint(ALLOC_IN_RC(dynamic)); 2777 match(VecX); 2778 match(VecY); 2779 match(VecZ); 2780 match(VecS); 2781 match(VecD); 2782 2783 format %{ %} 2784 interface(REG_INTER); 2785 %} 2786 2787 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2788 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2789 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2790 // runtime code generation via reg_class_dynamic. 2791 operand legVec() %{ 2792 constraint(ALLOC_IN_RC(dynamic)); 2793 match(VecX); 2794 match(VecY); 2795 match(VecZ); 2796 match(VecS); 2797 match(VecD); 2798 2799 format %{ %} 2800 interface(REG_INTER); 2801 %} 2802 2803 // Replaces vec during post-selection cleanup. See above. 2804 operand vecS() %{ 2805 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2806 match(VecS); 2807 2808 format %{ %} 2809 interface(REG_INTER); 2810 %} 2811 2812 // Replaces legVec during post-selection cleanup. See above. 2813 operand legVecS() %{ 2814 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2815 match(VecS); 2816 2817 format %{ %} 2818 interface(REG_INTER); 2819 %} 2820 2821 // Replaces vec during post-selection cleanup. See above. 2822 operand vecD() %{ 2823 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2824 match(VecD); 2825 2826 format %{ %} 2827 interface(REG_INTER); 2828 %} 2829 2830 // Replaces legVec during post-selection cleanup. See above. 2831 operand legVecD() %{ 2832 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2833 match(VecD); 2834 2835 format %{ %} 2836 interface(REG_INTER); 2837 %} 2838 2839 // Replaces vec during post-selection cleanup. See above. 2840 operand vecX() %{ 2841 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2842 match(VecX); 2843 2844 format %{ %} 2845 interface(REG_INTER); 2846 %} 2847 2848 // Replaces legVec during post-selection cleanup. See above. 2849 operand legVecX() %{ 2850 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2851 match(VecX); 2852 2853 format %{ %} 2854 interface(REG_INTER); 2855 %} 2856 2857 // Replaces vec during post-selection cleanup. See above. 2858 operand vecY() %{ 2859 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2860 match(VecY); 2861 2862 format %{ %} 2863 interface(REG_INTER); 2864 %} 2865 2866 // Replaces legVec during post-selection cleanup. See above. 2867 operand legVecY() %{ 2868 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2869 match(VecY); 2870 2871 format %{ %} 2872 interface(REG_INTER); 2873 %} 2874 2875 // Replaces vec during post-selection cleanup. See above. 2876 operand vecZ() %{ 2877 constraint(ALLOC_IN_RC(vectorz_reg)); 2878 match(VecZ); 2879 2880 format %{ %} 2881 interface(REG_INTER); 2882 %} 2883 2884 // Replaces legVec during post-selection cleanup. See above. 2885 operand legVecZ() %{ 2886 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2887 match(VecZ); 2888 2889 format %{ %} 2890 interface(REG_INTER); 2891 %} 2892 2893 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2894 2895 // ============================================================================ 2896 2897 instruct ShouldNotReachHere() %{ 2898 match(Halt); 2899 format %{ "stop\t# ShouldNotReachHere" %} 2900 ins_encode %{ 2901 if (is_reachable()) { 2902 const char* str = __ code_string(_halt_reason); 2903 __ stop(str); 2904 } 2905 %} 2906 ins_pipe(pipe_slow); 2907 %} 2908 2909 // ============================================================================ 2910 2911 instruct addF_reg(regF dst, regF src) %{ 2912 predicate(UseAVX == 0); 2913 match(Set dst (AddF dst src)); 2914 2915 format %{ "addss $dst, $src" %} 2916 ins_cost(150); 2917 ins_encode %{ 2918 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2919 %} 2920 ins_pipe(pipe_slow); 2921 %} 2922 2923 instruct addF_mem(regF dst, memory src) %{ 2924 predicate(UseAVX == 0); 2925 match(Set dst (AddF dst (LoadF src))); 2926 2927 format %{ "addss $dst, $src" %} 2928 ins_cost(150); 2929 ins_encode %{ 2930 __ addss($dst$$XMMRegister, $src$$Address); 2931 %} 2932 ins_pipe(pipe_slow); 2933 %} 2934 2935 instruct addF_imm(regF dst, immF con) %{ 2936 predicate(UseAVX == 0); 2937 match(Set dst (AddF dst con)); 2938 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2939 ins_cost(150); 2940 ins_encode %{ 2941 __ addss($dst$$XMMRegister, $constantaddress($con)); 2942 %} 2943 ins_pipe(pipe_slow); 2944 %} 2945 2946 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2947 predicate(UseAVX > 0); 2948 match(Set dst (AddF src1 src2)); 2949 2950 format %{ "vaddss $dst, $src1, $src2" %} 2951 ins_cost(150); 2952 ins_encode %{ 2953 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2954 %} 2955 ins_pipe(pipe_slow); 2956 %} 2957 2958 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2959 predicate(UseAVX > 0); 2960 match(Set dst (AddF src1 (LoadF src2))); 2961 2962 format %{ "vaddss $dst, $src1, $src2" %} 2963 ins_cost(150); 2964 ins_encode %{ 2965 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2966 %} 2967 ins_pipe(pipe_slow); 2968 %} 2969 2970 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2971 predicate(UseAVX > 0); 2972 match(Set dst (AddF src con)); 2973 2974 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2975 ins_cost(150); 2976 ins_encode %{ 2977 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2978 %} 2979 ins_pipe(pipe_slow); 2980 %} 2981 2982 instruct addD_reg(regD dst, regD src) %{ 2983 predicate(UseAVX == 0); 2984 match(Set dst (AddD dst src)); 2985 2986 format %{ "addsd $dst, $src" %} 2987 ins_cost(150); 2988 ins_encode %{ 2989 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2990 %} 2991 ins_pipe(pipe_slow); 2992 %} 2993 2994 instruct addD_mem(regD dst, memory src) %{ 2995 predicate(UseAVX == 0); 2996 match(Set dst (AddD dst (LoadD src))); 2997 2998 format %{ "addsd $dst, $src" %} 2999 ins_cost(150); 3000 ins_encode %{ 3001 __ addsd($dst$$XMMRegister, $src$$Address); 3002 %} 3003 ins_pipe(pipe_slow); 3004 %} 3005 3006 instruct addD_imm(regD dst, immD con) %{ 3007 predicate(UseAVX == 0); 3008 match(Set dst (AddD dst con)); 3009 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3010 ins_cost(150); 3011 ins_encode %{ 3012 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3013 %} 3014 ins_pipe(pipe_slow); 3015 %} 3016 3017 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3018 predicate(UseAVX > 0); 3019 match(Set dst (AddD src1 src2)); 3020 3021 format %{ "vaddsd $dst, $src1, $src2" %} 3022 ins_cost(150); 3023 ins_encode %{ 3024 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3025 %} 3026 ins_pipe(pipe_slow); 3027 %} 3028 3029 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3030 predicate(UseAVX > 0); 3031 match(Set dst (AddD src1 (LoadD src2))); 3032 3033 format %{ "vaddsd $dst, $src1, $src2" %} 3034 ins_cost(150); 3035 ins_encode %{ 3036 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3037 %} 3038 ins_pipe(pipe_slow); 3039 %} 3040 3041 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3042 predicate(UseAVX > 0); 3043 match(Set dst (AddD src con)); 3044 3045 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct subF_reg(regF dst, regF src) %{ 3054 predicate(UseAVX == 0); 3055 match(Set dst (SubF dst src)); 3056 3057 format %{ "subss $dst, $src" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct subF_mem(regF dst, memory src) %{ 3066 predicate(UseAVX == 0); 3067 match(Set dst (SubF dst (LoadF src))); 3068 3069 format %{ "subss $dst, $src" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ subss($dst$$XMMRegister, $src$$Address); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct subF_imm(regF dst, immF con) %{ 3078 predicate(UseAVX == 0); 3079 match(Set dst (SubF dst con)); 3080 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3081 ins_cost(150); 3082 ins_encode %{ 3083 __ subss($dst$$XMMRegister, $constantaddress($con)); 3084 %} 3085 ins_pipe(pipe_slow); 3086 %} 3087 3088 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3089 predicate(UseAVX > 0); 3090 match(Set dst (SubF src1 src2)); 3091 3092 format %{ "vsubss $dst, $src1, $src2" %} 3093 ins_cost(150); 3094 ins_encode %{ 3095 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3096 %} 3097 ins_pipe(pipe_slow); 3098 %} 3099 3100 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3101 predicate(UseAVX > 0); 3102 match(Set dst (SubF src1 (LoadF src2))); 3103 3104 format %{ "vsubss $dst, $src1, $src2" %} 3105 ins_cost(150); 3106 ins_encode %{ 3107 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3108 %} 3109 ins_pipe(pipe_slow); 3110 %} 3111 3112 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3113 predicate(UseAVX > 0); 3114 match(Set dst (SubF src con)); 3115 3116 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3120 %} 3121 ins_pipe(pipe_slow); 3122 %} 3123 3124 instruct subD_reg(regD dst, regD src) %{ 3125 predicate(UseAVX == 0); 3126 match(Set dst (SubD dst src)); 3127 3128 format %{ "subsd $dst, $src" %} 3129 ins_cost(150); 3130 ins_encode %{ 3131 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct subD_mem(regD dst, memory src) %{ 3137 predicate(UseAVX == 0); 3138 match(Set dst (SubD dst (LoadD src))); 3139 3140 format %{ "subsd $dst, $src" %} 3141 ins_cost(150); 3142 ins_encode %{ 3143 __ subsd($dst$$XMMRegister, $src$$Address); 3144 %} 3145 ins_pipe(pipe_slow); 3146 %} 3147 3148 instruct subD_imm(regD dst, immD con) %{ 3149 predicate(UseAVX == 0); 3150 match(Set dst (SubD dst con)); 3151 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3152 ins_cost(150); 3153 ins_encode %{ 3154 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3155 %} 3156 ins_pipe(pipe_slow); 3157 %} 3158 3159 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3160 predicate(UseAVX > 0); 3161 match(Set dst (SubD src1 src2)); 3162 3163 format %{ "vsubsd $dst, $src1, $src2" %} 3164 ins_cost(150); 3165 ins_encode %{ 3166 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3167 %} 3168 ins_pipe(pipe_slow); 3169 %} 3170 3171 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3172 predicate(UseAVX > 0); 3173 match(Set dst (SubD src1 (LoadD src2))); 3174 3175 format %{ "vsubsd $dst, $src1, $src2" %} 3176 ins_cost(150); 3177 ins_encode %{ 3178 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3179 %} 3180 ins_pipe(pipe_slow); 3181 %} 3182 3183 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3184 predicate(UseAVX > 0); 3185 match(Set dst (SubD src con)); 3186 3187 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3188 ins_cost(150); 3189 ins_encode %{ 3190 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3191 %} 3192 ins_pipe(pipe_slow); 3193 %} 3194 3195 instruct mulF_reg(regF dst, regF src) %{ 3196 predicate(UseAVX == 0); 3197 match(Set dst (MulF dst src)); 3198 3199 format %{ "mulss $dst, $src" %} 3200 ins_cost(150); 3201 ins_encode %{ 3202 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3203 %} 3204 ins_pipe(pipe_slow); 3205 %} 3206 3207 instruct mulF_mem(regF dst, memory src) %{ 3208 predicate(UseAVX == 0); 3209 match(Set dst (MulF dst (LoadF src))); 3210 3211 format %{ "mulss $dst, $src" %} 3212 ins_cost(150); 3213 ins_encode %{ 3214 __ mulss($dst$$XMMRegister, $src$$Address); 3215 %} 3216 ins_pipe(pipe_slow); 3217 %} 3218 3219 instruct mulF_imm(regF dst, immF con) %{ 3220 predicate(UseAVX == 0); 3221 match(Set dst (MulF dst con)); 3222 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3223 ins_cost(150); 3224 ins_encode %{ 3225 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3226 %} 3227 ins_pipe(pipe_slow); 3228 %} 3229 3230 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3231 predicate(UseAVX > 0); 3232 match(Set dst (MulF src1 src2)); 3233 3234 format %{ "vmulss $dst, $src1, $src2" %} 3235 ins_cost(150); 3236 ins_encode %{ 3237 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3238 %} 3239 ins_pipe(pipe_slow); 3240 %} 3241 3242 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3243 predicate(UseAVX > 0); 3244 match(Set dst (MulF src1 (LoadF src2))); 3245 3246 format %{ "vmulss $dst, $src1, $src2" %} 3247 ins_cost(150); 3248 ins_encode %{ 3249 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3250 %} 3251 ins_pipe(pipe_slow); 3252 %} 3253 3254 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3255 predicate(UseAVX > 0); 3256 match(Set dst (MulF src con)); 3257 3258 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3259 ins_cost(150); 3260 ins_encode %{ 3261 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3262 %} 3263 ins_pipe(pipe_slow); 3264 %} 3265 3266 instruct mulD_reg(regD dst, regD src) %{ 3267 predicate(UseAVX == 0); 3268 match(Set dst (MulD dst src)); 3269 3270 format %{ "mulsd $dst, $src" %} 3271 ins_cost(150); 3272 ins_encode %{ 3273 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3274 %} 3275 ins_pipe(pipe_slow); 3276 %} 3277 3278 instruct mulD_mem(regD dst, memory src) %{ 3279 predicate(UseAVX == 0); 3280 match(Set dst (MulD dst (LoadD src))); 3281 3282 format %{ "mulsd $dst, $src" %} 3283 ins_cost(150); 3284 ins_encode %{ 3285 __ mulsd($dst$$XMMRegister, $src$$Address); 3286 %} 3287 ins_pipe(pipe_slow); 3288 %} 3289 3290 instruct mulD_imm(regD dst, immD con) %{ 3291 predicate(UseAVX == 0); 3292 match(Set dst (MulD dst con)); 3293 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3294 ins_cost(150); 3295 ins_encode %{ 3296 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3297 %} 3298 ins_pipe(pipe_slow); 3299 %} 3300 3301 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3302 predicate(UseAVX > 0); 3303 match(Set dst (MulD src1 src2)); 3304 3305 format %{ "vmulsd $dst, $src1, $src2" %} 3306 ins_cost(150); 3307 ins_encode %{ 3308 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3309 %} 3310 ins_pipe(pipe_slow); 3311 %} 3312 3313 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3314 predicate(UseAVX > 0); 3315 match(Set dst (MulD src1 (LoadD src2))); 3316 3317 format %{ "vmulsd $dst, $src1, $src2" %} 3318 ins_cost(150); 3319 ins_encode %{ 3320 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3321 %} 3322 ins_pipe(pipe_slow); 3323 %} 3324 3325 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3326 predicate(UseAVX > 0); 3327 match(Set dst (MulD src con)); 3328 3329 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3330 ins_cost(150); 3331 ins_encode %{ 3332 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3333 %} 3334 ins_pipe(pipe_slow); 3335 %} 3336 3337 instruct divF_reg(regF dst, regF src) %{ 3338 predicate(UseAVX == 0); 3339 match(Set dst (DivF dst src)); 3340 3341 format %{ "divss $dst, $src" %} 3342 ins_cost(150); 3343 ins_encode %{ 3344 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3345 %} 3346 ins_pipe(pipe_slow); 3347 %} 3348 3349 instruct divF_mem(regF dst, memory src) %{ 3350 predicate(UseAVX == 0); 3351 match(Set dst (DivF dst (LoadF src))); 3352 3353 format %{ "divss $dst, $src" %} 3354 ins_cost(150); 3355 ins_encode %{ 3356 __ divss($dst$$XMMRegister, $src$$Address); 3357 %} 3358 ins_pipe(pipe_slow); 3359 %} 3360 3361 instruct divF_imm(regF dst, immF con) %{ 3362 predicate(UseAVX == 0); 3363 match(Set dst (DivF dst con)); 3364 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3365 ins_cost(150); 3366 ins_encode %{ 3367 __ divss($dst$$XMMRegister, $constantaddress($con)); 3368 %} 3369 ins_pipe(pipe_slow); 3370 %} 3371 3372 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3373 predicate(UseAVX > 0); 3374 match(Set dst (DivF src1 src2)); 3375 3376 format %{ "vdivss $dst, $src1, $src2" %} 3377 ins_cost(150); 3378 ins_encode %{ 3379 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3380 %} 3381 ins_pipe(pipe_slow); 3382 %} 3383 3384 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3385 predicate(UseAVX > 0); 3386 match(Set dst (DivF src1 (LoadF src2))); 3387 3388 format %{ "vdivss $dst, $src1, $src2" %} 3389 ins_cost(150); 3390 ins_encode %{ 3391 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3392 %} 3393 ins_pipe(pipe_slow); 3394 %} 3395 3396 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3397 predicate(UseAVX > 0); 3398 match(Set dst (DivF src con)); 3399 3400 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3404 %} 3405 ins_pipe(pipe_slow); 3406 %} 3407 3408 instruct divD_reg(regD dst, regD src) %{ 3409 predicate(UseAVX == 0); 3410 match(Set dst (DivD dst src)); 3411 3412 format %{ "divsd $dst, $src" %} 3413 ins_cost(150); 3414 ins_encode %{ 3415 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3416 %} 3417 ins_pipe(pipe_slow); 3418 %} 3419 3420 instruct divD_mem(regD dst, memory src) %{ 3421 predicate(UseAVX == 0); 3422 match(Set dst (DivD dst (LoadD src))); 3423 3424 format %{ "divsd $dst, $src" %} 3425 ins_cost(150); 3426 ins_encode %{ 3427 __ divsd($dst$$XMMRegister, $src$$Address); 3428 %} 3429 ins_pipe(pipe_slow); 3430 %} 3431 3432 instruct divD_imm(regD dst, immD con) %{ 3433 predicate(UseAVX == 0); 3434 match(Set dst (DivD dst con)); 3435 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3436 ins_cost(150); 3437 ins_encode %{ 3438 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3439 %} 3440 ins_pipe(pipe_slow); 3441 %} 3442 3443 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3444 predicate(UseAVX > 0); 3445 match(Set dst (DivD src1 src2)); 3446 3447 format %{ "vdivsd $dst, $src1, $src2" %} 3448 ins_cost(150); 3449 ins_encode %{ 3450 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3451 %} 3452 ins_pipe(pipe_slow); 3453 %} 3454 3455 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3456 predicate(UseAVX > 0); 3457 match(Set dst (DivD src1 (LoadD src2))); 3458 3459 format %{ "vdivsd $dst, $src1, $src2" %} 3460 ins_cost(150); 3461 ins_encode %{ 3462 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3463 %} 3464 ins_pipe(pipe_slow); 3465 %} 3466 3467 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3468 predicate(UseAVX > 0); 3469 match(Set dst (DivD src con)); 3470 3471 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3472 ins_cost(150); 3473 ins_encode %{ 3474 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3475 %} 3476 ins_pipe(pipe_slow); 3477 %} 3478 3479 instruct absF_reg(regF dst) %{ 3480 predicate(UseAVX == 0); 3481 match(Set dst (AbsF dst)); 3482 ins_cost(150); 3483 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3484 ins_encode %{ 3485 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3486 %} 3487 ins_pipe(pipe_slow); 3488 %} 3489 3490 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3491 predicate(UseAVX > 0); 3492 match(Set dst (AbsF src)); 3493 ins_cost(150); 3494 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3495 ins_encode %{ 3496 int vlen_enc = Assembler::AVX_128bit; 3497 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3498 ExternalAddress(float_signmask()), vlen_enc); 3499 %} 3500 ins_pipe(pipe_slow); 3501 %} 3502 3503 instruct absD_reg(regD dst) %{ 3504 predicate(UseAVX == 0); 3505 match(Set dst (AbsD dst)); 3506 ins_cost(150); 3507 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3508 "# abs double by sign masking" %} 3509 ins_encode %{ 3510 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3511 %} 3512 ins_pipe(pipe_slow); 3513 %} 3514 3515 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3516 predicate(UseAVX > 0); 3517 match(Set dst (AbsD src)); 3518 ins_cost(150); 3519 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3520 "# abs double by sign masking" %} 3521 ins_encode %{ 3522 int vlen_enc = Assembler::AVX_128bit; 3523 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3524 ExternalAddress(double_signmask()), vlen_enc); 3525 %} 3526 ins_pipe(pipe_slow); 3527 %} 3528 3529 instruct negF_reg(regF dst) %{ 3530 predicate(UseAVX == 0); 3531 match(Set dst (NegF dst)); 3532 ins_cost(150); 3533 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3534 ins_encode %{ 3535 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3536 %} 3537 ins_pipe(pipe_slow); 3538 %} 3539 3540 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3541 predicate(UseAVX > 0); 3542 match(Set dst (NegF src)); 3543 ins_cost(150); 3544 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3545 ins_encode %{ 3546 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3547 ExternalAddress(float_signflip())); 3548 %} 3549 ins_pipe(pipe_slow); 3550 %} 3551 3552 instruct negD_reg(regD dst) %{ 3553 predicate(UseAVX == 0); 3554 match(Set dst (NegD dst)); 3555 ins_cost(150); 3556 format %{ "xorpd $dst, [0x8000000000000000]\t" 3557 "# neg double by sign flipping" %} 3558 ins_encode %{ 3559 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3560 %} 3561 ins_pipe(pipe_slow); 3562 %} 3563 3564 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3565 predicate(UseAVX > 0); 3566 match(Set dst (NegD src)); 3567 ins_cost(150); 3568 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3569 "# neg double by sign flipping" %} 3570 ins_encode %{ 3571 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3572 ExternalAddress(double_signflip())); 3573 %} 3574 ins_pipe(pipe_slow); 3575 %} 3576 3577 // sqrtss instruction needs destination register to be pre initialized for best performance 3578 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3579 instruct sqrtF_reg(regF dst) %{ 3580 match(Set dst (SqrtF dst)); 3581 format %{ "sqrtss $dst, $dst" %} 3582 ins_encode %{ 3583 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3584 %} 3585 ins_pipe(pipe_slow); 3586 %} 3587 3588 // sqrtsd instruction needs destination register to be pre initialized for best performance 3589 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3590 instruct sqrtD_reg(regD dst) %{ 3591 match(Set dst (SqrtD dst)); 3592 format %{ "sqrtsd $dst, $dst" %} 3593 ins_encode %{ 3594 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3595 %} 3596 ins_pipe(pipe_slow); 3597 %} 3598 3599 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3600 effect(TEMP tmp); 3601 match(Set dst (ConvF2HF src)); 3602 ins_cost(125); 3603 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3604 ins_encode %{ 3605 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3606 %} 3607 ins_pipe( pipe_slow ); 3608 %} 3609 3610 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3611 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3612 effect(TEMP ktmp, TEMP rtmp); 3613 match(Set mem (StoreC mem (ConvF2HF src))); 3614 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3615 ins_encode %{ 3616 __ movl($rtmp$$Register, 0x1); 3617 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3618 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3619 %} 3620 ins_pipe( pipe_slow ); 3621 %} 3622 3623 instruct vconvF2HF(vec dst, vec src) %{ 3624 match(Set dst (VectorCastF2HF src)); 3625 format %{ "vector_conv_F2HF $dst $src" %} 3626 ins_encode %{ 3627 int vlen_enc = vector_length_encoding(this, $src); 3628 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3634 predicate(n->as_StoreVector()->memory_size() >= 16); 3635 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3636 format %{ "vcvtps2ph $mem,$src" %} 3637 ins_encode %{ 3638 int vlen_enc = vector_length_encoding(this, $src); 3639 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3645 match(Set dst (ConvHF2F src)); 3646 format %{ "vcvtph2ps $dst,$src" %} 3647 ins_encode %{ 3648 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3649 %} 3650 ins_pipe( pipe_slow ); 3651 %} 3652 3653 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3654 match(Set dst (VectorCastHF2F (LoadVector mem))); 3655 format %{ "vcvtph2ps $dst,$mem" %} 3656 ins_encode %{ 3657 int vlen_enc = vector_length_encoding(this); 3658 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3659 %} 3660 ins_pipe( pipe_slow ); 3661 %} 3662 3663 instruct vconvHF2F(vec dst, vec src) %{ 3664 match(Set dst (VectorCastHF2F src)); 3665 ins_cost(125); 3666 format %{ "vector_conv_HF2F $dst,$src" %} 3667 ins_encode %{ 3668 int vlen_enc = vector_length_encoding(this); 3669 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 // ---------------------------------------- VectorReinterpret ------------------------------------ 3675 instruct reinterpret_mask(kReg dst) %{ 3676 predicate(n->bottom_type()->isa_vectmask() && 3677 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3678 match(Set dst (VectorReinterpret dst)); 3679 ins_cost(125); 3680 format %{ "vector_reinterpret $dst\t!" %} 3681 ins_encode %{ 3682 // empty 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3688 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3689 n->bottom_type()->isa_vectmask() && 3690 n->in(1)->bottom_type()->isa_vectmask() && 3691 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3692 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3693 match(Set dst (VectorReinterpret src)); 3694 effect(TEMP xtmp); 3695 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3696 ins_encode %{ 3697 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3698 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3699 assert(src_sz == dst_sz , "src and dst size mismatch"); 3700 int vlen_enc = vector_length_encoding(src_sz); 3701 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3702 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3708 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3709 n->bottom_type()->isa_vectmask() && 3710 n->in(1)->bottom_type()->isa_vectmask() && 3711 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3712 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3713 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3714 match(Set dst (VectorReinterpret src)); 3715 effect(TEMP xtmp); 3716 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3717 ins_encode %{ 3718 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3719 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3720 assert(src_sz == dst_sz , "src and dst size mismatch"); 3721 int vlen_enc = vector_length_encoding(src_sz); 3722 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3723 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3729 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3730 n->bottom_type()->isa_vectmask() && 3731 n->in(1)->bottom_type()->isa_vectmask() && 3732 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3733 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3734 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3735 match(Set dst (VectorReinterpret src)); 3736 effect(TEMP xtmp); 3737 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3738 ins_encode %{ 3739 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3740 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3741 assert(src_sz == dst_sz , "src and dst size mismatch"); 3742 int vlen_enc = vector_length_encoding(src_sz); 3743 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3744 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct reinterpret(vec dst) %{ 3750 predicate(!n->bottom_type()->isa_vectmask() && 3751 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3752 match(Set dst (VectorReinterpret dst)); 3753 ins_cost(125); 3754 format %{ "vector_reinterpret $dst\t!" %} 3755 ins_encode %{ 3756 // empty 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct reinterpret_expand(vec dst, vec src) %{ 3762 predicate(UseAVX == 0 && 3763 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3764 match(Set dst (VectorReinterpret src)); 3765 ins_cost(125); 3766 effect(TEMP dst); 3767 format %{ "vector_reinterpret_expand $dst,$src" %} 3768 ins_encode %{ 3769 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3770 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3771 3772 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3773 if (src_vlen_in_bytes == 4) { 3774 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3775 } else { 3776 assert(src_vlen_in_bytes == 8, ""); 3777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3778 } 3779 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3780 %} 3781 ins_pipe( pipe_slow ); 3782 %} 3783 3784 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3785 predicate(UseAVX > 0 && 3786 !n->bottom_type()->isa_vectmask() && 3787 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3788 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3789 match(Set dst (VectorReinterpret src)); 3790 ins_cost(125); 3791 format %{ "vector_reinterpret_expand $dst,$src" %} 3792 ins_encode %{ 3793 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 3799 instruct vreinterpret_expand(legVec dst, vec src) %{ 3800 predicate(UseAVX > 0 && 3801 !n->bottom_type()->isa_vectmask() && 3802 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3803 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3804 match(Set dst (VectorReinterpret src)); 3805 ins_cost(125); 3806 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3807 ins_encode %{ 3808 switch (Matcher::vector_length_in_bytes(this, $src)) { 3809 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3810 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3811 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3812 default: ShouldNotReachHere(); 3813 } 3814 %} 3815 ins_pipe( pipe_slow ); 3816 %} 3817 3818 instruct reinterpret_shrink(vec dst, legVec src) %{ 3819 predicate(!n->bottom_type()->isa_vectmask() && 3820 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3821 match(Set dst (VectorReinterpret src)); 3822 ins_cost(125); 3823 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3824 ins_encode %{ 3825 switch (Matcher::vector_length_in_bytes(this)) { 3826 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3827 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3828 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3829 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3830 default: ShouldNotReachHere(); 3831 } 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 // ---------------------------------------------------------------------------------------------------- 3837 3838 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3839 match(Set dst (RoundDoubleMode src rmode)); 3840 format %{ "roundsd $dst,$src" %} 3841 ins_cost(150); 3842 ins_encode %{ 3843 assert(UseSSE >= 4, "required"); 3844 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3846 } 3847 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3848 %} 3849 ins_pipe(pipe_slow); 3850 %} 3851 3852 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3853 match(Set dst (RoundDoubleMode con rmode)); 3854 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3855 ins_cost(150); 3856 ins_encode %{ 3857 assert(UseSSE >= 4, "required"); 3858 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3859 %} 3860 ins_pipe(pipe_slow); 3861 %} 3862 3863 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3864 predicate(Matcher::vector_length(n) < 8); 3865 match(Set dst (RoundDoubleModeV src rmode)); 3866 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3867 ins_encode %{ 3868 assert(UseAVX > 0, "required"); 3869 int vlen_enc = vector_length_encoding(this); 3870 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3871 %} 3872 ins_pipe( pipe_slow ); 3873 %} 3874 3875 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3876 predicate(Matcher::vector_length(n) == 8); 3877 match(Set dst (RoundDoubleModeV src rmode)); 3878 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3879 ins_encode %{ 3880 assert(UseAVX > 2, "required"); 3881 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3882 %} 3883 ins_pipe( pipe_slow ); 3884 %} 3885 3886 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3887 predicate(Matcher::vector_length(n) < 8); 3888 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3889 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3890 ins_encode %{ 3891 assert(UseAVX > 0, "required"); 3892 int vlen_enc = vector_length_encoding(this); 3893 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3894 %} 3895 ins_pipe( pipe_slow ); 3896 %} 3897 3898 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3899 predicate(Matcher::vector_length(n) == 8); 3900 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3901 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3902 ins_encode %{ 3903 assert(UseAVX > 2, "required"); 3904 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 instruct onspinwait() %{ 3910 match(OnSpinWait); 3911 ins_cost(200); 3912 3913 format %{ 3914 $$template 3915 $$emit$$"pause\t! membar_onspinwait" 3916 %} 3917 ins_encode %{ 3918 __ pause(); 3919 %} 3920 ins_pipe(pipe_slow); 3921 %} 3922 3923 // a * b + c 3924 instruct fmaD_reg(regD a, regD b, regD c) %{ 3925 match(Set c (FmaD c (Binary a b))); 3926 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3927 ins_cost(150); 3928 ins_encode %{ 3929 assert(UseFMA, "Needs FMA instructions support."); 3930 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3931 %} 3932 ins_pipe( pipe_slow ); 3933 %} 3934 3935 // a * b + c 3936 instruct fmaF_reg(regF a, regF b, regF c) %{ 3937 match(Set c (FmaF c (Binary a b))); 3938 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3939 ins_cost(150); 3940 ins_encode %{ 3941 assert(UseFMA, "Needs FMA instructions support."); 3942 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 // ====================VECTOR INSTRUCTIONS===================================== 3948 3949 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3950 instruct MoveVec2Leg(legVec dst, vec src) %{ 3951 match(Set dst src); 3952 format %{ "" %} 3953 ins_encode %{ 3954 ShouldNotReachHere(); 3955 %} 3956 ins_pipe( fpu_reg_reg ); 3957 %} 3958 3959 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3960 match(Set dst src); 3961 format %{ "" %} 3962 ins_encode %{ 3963 ShouldNotReachHere(); 3964 %} 3965 ins_pipe( fpu_reg_reg ); 3966 %} 3967 3968 // ============================================================================ 3969 3970 // Load vectors generic operand pattern 3971 instruct loadV(vec dst, memory mem) %{ 3972 match(Set dst (LoadVector mem)); 3973 ins_cost(125); 3974 format %{ "load_vector $dst,$mem" %} 3975 ins_encode %{ 3976 BasicType bt = Matcher::vector_element_basic_type(this); 3977 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 // Store vectors generic operand pattern. 3983 instruct storeV(memory mem, vec src) %{ 3984 match(Set mem (StoreVector mem src)); 3985 ins_cost(145); 3986 format %{ "store_vector $mem,$src\n\t" %} 3987 ins_encode %{ 3988 switch (Matcher::vector_length_in_bytes(this, $src)) { 3989 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3990 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3991 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3992 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3993 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3994 default: ShouldNotReachHere(); 3995 } 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 // ---------------------------------------- Gather ------------------------------------ 4001 4002 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4003 4004 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4005 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4006 Matcher::vector_length_in_bytes(n) <= 32); 4007 match(Set dst (LoadVectorGather mem idx)); 4008 effect(TEMP dst, TEMP tmp, TEMP mask); 4009 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4010 ins_encode %{ 4011 int vlen_enc = vector_length_encoding(this); 4012 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4013 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4014 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4015 __ lea($tmp$$Register, $mem$$Address); 4016 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 4021 4022 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4023 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4024 !is_subword_type(Matcher::vector_element_basic_type(n))); 4025 match(Set dst (LoadVectorGather mem idx)); 4026 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4027 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4028 ins_encode %{ 4029 int vlen_enc = vector_length_encoding(this); 4030 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4031 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4032 __ lea($tmp$$Register, $mem$$Address); 4033 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4034 %} 4035 ins_pipe( pipe_slow ); 4036 %} 4037 4038 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4039 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4040 !is_subword_type(Matcher::vector_element_basic_type(n))); 4041 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4042 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4043 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4044 ins_encode %{ 4045 assert(UseAVX > 2, "sanity"); 4046 int vlen_enc = vector_length_encoding(this); 4047 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4048 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4049 // Note: Since gather instruction partially updates the opmask register used 4050 // for predication hense moving mask operand to a temporary. 4051 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4052 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4053 __ lea($tmp$$Register, $mem$$Address); 4054 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4055 %} 4056 ins_pipe( pipe_slow ); 4057 %} 4058 4059 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4060 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4061 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4062 effect(TEMP tmp, TEMP rtmp); 4063 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4064 ins_encode %{ 4065 int vlen_enc = vector_length_encoding(this); 4066 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4067 __ lea($tmp$$Register, $mem$$Address); 4068 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4069 %} 4070 ins_pipe( pipe_slow ); 4071 %} 4072 4073 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4074 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4075 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4076 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4077 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4078 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4079 ins_encode %{ 4080 int vlen_enc = vector_length_encoding(this); 4081 int vector_len = Matcher::vector_length(this); 4082 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4083 __ lea($tmp$$Register, $mem$$Address); 4084 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4085 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4086 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4092 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4093 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4094 effect(TEMP tmp, TEMP rtmp, KILL cr); 4095 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4096 ins_encode %{ 4097 int vlen_enc = vector_length_encoding(this); 4098 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4099 __ lea($tmp$$Register, $mem$$Address); 4100 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4101 %} 4102 ins_pipe( pipe_slow ); 4103 %} 4104 4105 4106 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4107 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4108 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4109 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4110 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4111 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4112 ins_encode %{ 4113 int vlen_enc = vector_length_encoding(this); 4114 int vector_len = Matcher::vector_length(this); 4115 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4116 __ lea($tmp$$Register, $mem$$Address); 4117 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4118 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4119 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 4124 4125 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4126 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4127 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4128 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4129 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4130 ins_encode %{ 4131 int vlen_enc = vector_length_encoding(this); 4132 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4133 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4134 __ lea($tmp$$Register, $mem$$Address); 4135 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4136 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4137 %} 4138 ins_pipe( pipe_slow ); 4139 %} 4140 4141 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4142 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4143 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4144 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4145 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4146 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4147 ins_encode %{ 4148 int vlen_enc = vector_length_encoding(this); 4149 int vector_len = Matcher::vector_length(this); 4150 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4151 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4152 __ lea($tmp$$Register, $mem$$Address); 4153 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4154 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4155 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4156 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4157 %} 4158 ins_pipe( pipe_slow ); 4159 %} 4160 4161 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4162 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4163 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4164 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4165 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4166 ins_encode %{ 4167 int vlen_enc = vector_length_encoding(this); 4168 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4169 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4170 __ lea($tmp$$Register, $mem$$Address); 4171 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4172 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4173 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4179 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4180 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4181 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4182 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4183 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4184 ins_encode %{ 4185 int vlen_enc = vector_length_encoding(this); 4186 int vector_len = Matcher::vector_length(this); 4187 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4188 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4189 __ lea($tmp$$Register, $mem$$Address); 4190 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4191 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4192 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4193 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4199 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4200 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4201 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4202 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4203 ins_encode %{ 4204 int vlen_enc = vector_length_encoding(this); 4205 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4206 __ lea($tmp$$Register, $mem$$Address); 4207 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4208 if (elem_bt == T_SHORT) { 4209 __ movl($mask_idx$$Register, 0x55555555); 4210 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4211 } 4212 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4213 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4219 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4220 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4221 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4222 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4223 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4224 ins_encode %{ 4225 int vlen_enc = vector_length_encoding(this); 4226 int vector_len = Matcher::vector_length(this); 4227 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4228 __ lea($tmp$$Register, $mem$$Address); 4229 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4230 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4231 if (elem_bt == T_SHORT) { 4232 __ movl($mask_idx$$Register, 0x55555555); 4233 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4234 } 4235 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4236 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4237 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4243 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4244 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4245 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4246 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4247 ins_encode %{ 4248 int vlen_enc = vector_length_encoding(this); 4249 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4250 __ lea($tmp$$Register, $mem$$Address); 4251 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4252 if (elem_bt == T_SHORT) { 4253 __ movl($mask_idx$$Register, 0x55555555); 4254 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4255 } 4256 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4257 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4258 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4259 %} 4260 ins_pipe( pipe_slow ); 4261 %} 4262 4263 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4264 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4265 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4266 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4267 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4268 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4269 ins_encode %{ 4270 int vlen_enc = vector_length_encoding(this); 4271 int vector_len = Matcher::vector_length(this); 4272 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4273 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4274 __ lea($tmp$$Register, $mem$$Address); 4275 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4276 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4277 if (elem_bt == T_SHORT) { 4278 __ movl($mask_idx$$Register, 0x55555555); 4279 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4280 } 4281 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4282 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4283 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 // ====================Scatter======================================= 4289 4290 // Scatter INT, LONG, FLOAT, DOUBLE 4291 4292 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4293 predicate(UseAVX > 2); 4294 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4295 effect(TEMP tmp, TEMP ktmp); 4296 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4297 ins_encode %{ 4298 int vlen_enc = vector_length_encoding(this, $src); 4299 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4300 4301 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4302 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4303 4304 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4305 __ lea($tmp$$Register, $mem$$Address); 4306 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4307 %} 4308 ins_pipe( pipe_slow ); 4309 %} 4310 4311 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4312 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4313 effect(TEMP tmp, TEMP ktmp); 4314 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4315 ins_encode %{ 4316 int vlen_enc = vector_length_encoding(this, $src); 4317 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4318 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4319 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4320 // Note: Since scatter instruction partially updates the opmask register used 4321 // for predication hense moving mask operand to a temporary. 4322 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4323 __ lea($tmp$$Register, $mem$$Address); 4324 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 // ====================REPLICATE======================================= 4330 4331 // Replicate byte scalar to be vector 4332 instruct vReplB_reg(vec dst, rRegI src) %{ 4333 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4334 match(Set dst (Replicate src)); 4335 format %{ "replicateB $dst,$src" %} 4336 ins_encode %{ 4337 uint vlen = Matcher::vector_length(this); 4338 if (UseAVX >= 2) { 4339 int vlen_enc = vector_length_encoding(this); 4340 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4341 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4342 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4343 } else { 4344 __ movdl($dst$$XMMRegister, $src$$Register); 4345 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4346 } 4347 } else { 4348 assert(UseAVX < 2, ""); 4349 __ movdl($dst$$XMMRegister, $src$$Register); 4350 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4351 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4352 if (vlen >= 16) { 4353 assert(vlen == 16, ""); 4354 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4355 } 4356 } 4357 %} 4358 ins_pipe( pipe_slow ); 4359 %} 4360 4361 instruct ReplB_mem(vec dst, memory mem) %{ 4362 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4363 match(Set dst (Replicate (LoadB mem))); 4364 format %{ "replicateB $dst,$mem" %} 4365 ins_encode %{ 4366 int vlen_enc = vector_length_encoding(this); 4367 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4368 %} 4369 ins_pipe( pipe_slow ); 4370 %} 4371 4372 // ====================ReplicateS======================================= 4373 4374 instruct vReplS_reg(vec dst, rRegI src) %{ 4375 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4376 match(Set dst (Replicate src)); 4377 format %{ "replicateS $dst,$src" %} 4378 ins_encode %{ 4379 uint vlen = Matcher::vector_length(this); 4380 int vlen_enc = vector_length_encoding(this); 4381 if (UseAVX >= 2) { 4382 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4383 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4384 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4385 } else { 4386 __ movdl($dst$$XMMRegister, $src$$Register); 4387 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4388 } 4389 } else { 4390 assert(UseAVX < 2, ""); 4391 __ movdl($dst$$XMMRegister, $src$$Register); 4392 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4393 if (vlen >= 8) { 4394 assert(vlen == 8, ""); 4395 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4396 } 4397 } 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4403 match(Set dst (Replicate con)); 4404 effect(TEMP rtmp); 4405 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4406 ins_encode %{ 4407 int vlen_enc = vector_length_encoding(this); 4408 BasicType bt = Matcher::vector_element_basic_type(this); 4409 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4410 __ movl($rtmp$$Register, $con$$constant); 4411 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4417 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4418 match(Set dst (Replicate src)); 4419 effect(TEMP rtmp); 4420 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4421 ins_encode %{ 4422 int vlen_enc = vector_length_encoding(this); 4423 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4424 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct ReplS_mem(vec dst, memory mem) %{ 4430 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4431 match(Set dst (Replicate (LoadS mem))); 4432 format %{ "replicateS $dst,$mem" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this); 4435 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4436 %} 4437 ins_pipe( pipe_slow ); 4438 %} 4439 4440 // ====================ReplicateI======================================= 4441 4442 instruct ReplI_reg(vec dst, rRegI src) %{ 4443 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4444 match(Set dst (Replicate src)); 4445 format %{ "replicateI $dst,$src" %} 4446 ins_encode %{ 4447 uint vlen = Matcher::vector_length(this); 4448 int vlen_enc = vector_length_encoding(this); 4449 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4450 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4451 } else if (VM_Version::supports_avx2()) { 4452 __ movdl($dst$$XMMRegister, $src$$Register); 4453 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4454 } else { 4455 __ movdl($dst$$XMMRegister, $src$$Register); 4456 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4457 } 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct ReplI_mem(vec dst, memory mem) %{ 4463 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4464 match(Set dst (Replicate (LoadI mem))); 4465 format %{ "replicateI $dst,$mem" %} 4466 ins_encode %{ 4467 int vlen_enc = vector_length_encoding(this); 4468 if (VM_Version::supports_avx2()) { 4469 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4470 } else if (VM_Version::supports_avx()) { 4471 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4472 } else { 4473 __ movdl($dst$$XMMRegister, $mem$$Address); 4474 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4475 } 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct ReplI_imm(vec dst, immI con) %{ 4481 predicate(Matcher::is_non_long_integral_vector(n)); 4482 match(Set dst (Replicate con)); 4483 format %{ "replicateI $dst,$con" %} 4484 ins_encode %{ 4485 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4486 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4487 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4488 BasicType bt = Matcher::vector_element_basic_type(this); 4489 int vlen = Matcher::vector_length_in_bytes(this); 4490 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 // Replicate scalar zero to be vector 4496 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4497 predicate(Matcher::is_non_long_integral_vector(n)); 4498 match(Set dst (Replicate zero)); 4499 format %{ "replicateI $dst,$zero" %} 4500 ins_encode %{ 4501 int vlen_enc = vector_length_encoding(this); 4502 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4503 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4504 } else { 4505 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4506 } 4507 %} 4508 ins_pipe( fpu_reg_reg ); 4509 %} 4510 4511 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4512 predicate(Matcher::is_non_long_integral_vector(n)); 4513 match(Set dst (Replicate con)); 4514 format %{ "vallones $dst" %} 4515 ins_encode %{ 4516 int vector_len = vector_length_encoding(this); 4517 __ vallones($dst$$XMMRegister, vector_len); 4518 %} 4519 ins_pipe( pipe_slow ); 4520 %} 4521 4522 // ====================ReplicateL======================================= 4523 4524 // Replicate long (8 byte) scalar to be vector 4525 instruct ReplL_reg(vec dst, rRegL src) %{ 4526 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4527 match(Set dst (Replicate src)); 4528 format %{ "replicateL $dst,$src" %} 4529 ins_encode %{ 4530 int vlen = Matcher::vector_length(this); 4531 int vlen_enc = vector_length_encoding(this); 4532 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4533 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4534 } else if (VM_Version::supports_avx2()) { 4535 __ movdq($dst$$XMMRegister, $src$$Register); 4536 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4537 } else { 4538 __ movdq($dst$$XMMRegister, $src$$Register); 4539 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4540 } 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 instruct ReplL_mem(vec dst, memory mem) %{ 4546 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4547 match(Set dst (Replicate (LoadL mem))); 4548 format %{ "replicateL $dst,$mem" %} 4549 ins_encode %{ 4550 int vlen_enc = vector_length_encoding(this); 4551 if (VM_Version::supports_avx2()) { 4552 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4553 } else if (VM_Version::supports_sse3()) { 4554 __ movddup($dst$$XMMRegister, $mem$$Address); 4555 } else { 4556 __ movq($dst$$XMMRegister, $mem$$Address); 4557 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4558 } 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4564 instruct ReplL_imm(vec dst, immL con) %{ 4565 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4566 match(Set dst (Replicate con)); 4567 format %{ "replicateL $dst,$con" %} 4568 ins_encode %{ 4569 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4570 int vlen = Matcher::vector_length_in_bytes(this); 4571 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4572 %} 4573 ins_pipe( pipe_slow ); 4574 %} 4575 4576 instruct ReplL_zero(vec dst, immL0 zero) %{ 4577 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4578 match(Set dst (Replicate zero)); 4579 format %{ "replicateL $dst,$zero" %} 4580 ins_encode %{ 4581 int vlen_enc = vector_length_encoding(this); 4582 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4583 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4584 } else { 4585 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4586 } 4587 %} 4588 ins_pipe( fpu_reg_reg ); 4589 %} 4590 4591 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4592 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4593 match(Set dst (Replicate con)); 4594 format %{ "vallones $dst" %} 4595 ins_encode %{ 4596 int vector_len = vector_length_encoding(this); 4597 __ vallones($dst$$XMMRegister, vector_len); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 // ====================ReplicateF======================================= 4603 4604 instruct vReplF_reg(vec dst, vlRegF src) %{ 4605 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4606 match(Set dst (Replicate src)); 4607 format %{ "replicateF $dst,$src" %} 4608 ins_encode %{ 4609 uint vlen = Matcher::vector_length(this); 4610 int vlen_enc = vector_length_encoding(this); 4611 if (vlen <= 4) { 4612 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4613 } else if (VM_Version::supports_avx2()) { 4614 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4615 } else { 4616 assert(vlen == 8, "sanity"); 4617 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4618 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4619 } 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 instruct ReplF_reg(vec dst, vlRegF src) %{ 4625 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4626 match(Set dst (Replicate src)); 4627 format %{ "replicateF $dst,$src" %} 4628 ins_encode %{ 4629 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4630 %} 4631 ins_pipe( pipe_slow ); 4632 %} 4633 4634 instruct ReplF_mem(vec dst, memory mem) %{ 4635 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4636 match(Set dst (Replicate (LoadF mem))); 4637 format %{ "replicateF $dst,$mem" %} 4638 ins_encode %{ 4639 int vlen_enc = vector_length_encoding(this); 4640 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 4645 // Replicate float scalar immediate to be vector by loading from const table. 4646 instruct ReplF_imm(vec dst, immF con) %{ 4647 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4648 match(Set dst (Replicate con)); 4649 format %{ "replicateF $dst,$con" %} 4650 ins_encode %{ 4651 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4652 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4653 int vlen = Matcher::vector_length_in_bytes(this); 4654 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 4659 instruct ReplF_zero(vec dst, immF0 zero) %{ 4660 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4661 match(Set dst (Replicate zero)); 4662 format %{ "replicateF $dst,$zero" %} 4663 ins_encode %{ 4664 int vlen_enc = vector_length_encoding(this); 4665 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4666 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4667 } else { 4668 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4669 } 4670 %} 4671 ins_pipe( fpu_reg_reg ); 4672 %} 4673 4674 // ====================ReplicateD======================================= 4675 4676 // Replicate double (8 bytes) scalar to be vector 4677 instruct vReplD_reg(vec dst, vlRegD src) %{ 4678 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4679 match(Set dst (Replicate src)); 4680 format %{ "replicateD $dst,$src" %} 4681 ins_encode %{ 4682 uint vlen = Matcher::vector_length(this); 4683 int vlen_enc = vector_length_encoding(this); 4684 if (vlen <= 2) { 4685 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4686 } else if (VM_Version::supports_avx2()) { 4687 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4688 } else { 4689 assert(vlen == 4, "sanity"); 4690 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4691 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4692 } 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 4697 instruct ReplD_reg(vec dst, vlRegD src) %{ 4698 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4699 match(Set dst (Replicate src)); 4700 format %{ "replicateD $dst,$src" %} 4701 ins_encode %{ 4702 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct ReplD_mem(vec dst, memory mem) %{ 4708 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4709 match(Set dst (Replicate (LoadD mem))); 4710 format %{ "replicateD $dst,$mem" %} 4711 ins_encode %{ 4712 if (Matcher::vector_length(this) >= 4) { 4713 int vlen_enc = vector_length_encoding(this); 4714 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4715 } else { 4716 __ movddup($dst$$XMMRegister, $mem$$Address); 4717 } 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4723 instruct ReplD_imm(vec dst, immD con) %{ 4724 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4725 match(Set dst (Replicate con)); 4726 format %{ "replicateD $dst,$con" %} 4727 ins_encode %{ 4728 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4729 int vlen = Matcher::vector_length_in_bytes(this); 4730 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4731 %} 4732 ins_pipe( pipe_slow ); 4733 %} 4734 4735 instruct ReplD_zero(vec dst, immD0 zero) %{ 4736 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4737 match(Set dst (Replicate zero)); 4738 format %{ "replicateD $dst,$zero" %} 4739 ins_encode %{ 4740 int vlen_enc = vector_length_encoding(this); 4741 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4742 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4743 } else { 4744 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4745 } 4746 %} 4747 ins_pipe( fpu_reg_reg ); 4748 %} 4749 4750 // ====================VECTOR INSERT======================================= 4751 4752 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4753 predicate(Matcher::vector_length_in_bytes(n) < 32); 4754 match(Set dst (VectorInsert (Binary dst val) idx)); 4755 format %{ "vector_insert $dst,$val,$idx" %} 4756 ins_encode %{ 4757 assert(UseSSE >= 4, "required"); 4758 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4759 4760 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4761 4762 assert(is_integral_type(elem_bt), ""); 4763 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4764 4765 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4771 predicate(Matcher::vector_length_in_bytes(n) == 32); 4772 match(Set dst (VectorInsert (Binary src val) idx)); 4773 effect(TEMP vtmp); 4774 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4775 ins_encode %{ 4776 int vlen_enc = Assembler::AVX_256bit; 4777 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4778 int elem_per_lane = 16/type2aelembytes(elem_bt); 4779 int log2epr = log2(elem_per_lane); 4780 4781 assert(is_integral_type(elem_bt), "sanity"); 4782 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4783 4784 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4785 uint y_idx = ($idx$$constant >> log2epr) & 1; 4786 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4787 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4788 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4789 %} 4790 ins_pipe( pipe_slow ); 4791 %} 4792 4793 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4794 predicate(Matcher::vector_length_in_bytes(n) == 64); 4795 match(Set dst (VectorInsert (Binary src val) idx)); 4796 effect(TEMP vtmp); 4797 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4798 ins_encode %{ 4799 assert(UseAVX > 2, "sanity"); 4800 4801 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4802 int elem_per_lane = 16/type2aelembytes(elem_bt); 4803 int log2epr = log2(elem_per_lane); 4804 4805 assert(is_integral_type(elem_bt), ""); 4806 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4807 4808 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4809 uint y_idx = ($idx$$constant >> log2epr) & 3; 4810 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4811 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4812 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4818 predicate(Matcher::vector_length(n) == 2); 4819 match(Set dst (VectorInsert (Binary dst val) idx)); 4820 format %{ "vector_insert $dst,$val,$idx" %} 4821 ins_encode %{ 4822 assert(UseSSE >= 4, "required"); 4823 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4824 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4825 4826 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4827 %} 4828 ins_pipe( pipe_slow ); 4829 %} 4830 4831 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4832 predicate(Matcher::vector_length(n) == 4); 4833 match(Set dst (VectorInsert (Binary src val) idx)); 4834 effect(TEMP vtmp); 4835 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4836 ins_encode %{ 4837 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4838 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4839 4840 uint x_idx = $idx$$constant & right_n_bits(1); 4841 uint y_idx = ($idx$$constant >> 1) & 1; 4842 int vlen_enc = Assembler::AVX_256bit; 4843 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4844 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4845 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4846 %} 4847 ins_pipe( pipe_slow ); 4848 %} 4849 4850 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4851 predicate(Matcher::vector_length(n) == 8); 4852 match(Set dst (VectorInsert (Binary src val) idx)); 4853 effect(TEMP vtmp); 4854 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4855 ins_encode %{ 4856 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4857 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4858 4859 uint x_idx = $idx$$constant & right_n_bits(1); 4860 uint y_idx = ($idx$$constant >> 1) & 3; 4861 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4862 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4863 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4864 %} 4865 ins_pipe( pipe_slow ); 4866 %} 4867 4868 instruct insertF(vec dst, regF val, immU8 idx) %{ 4869 predicate(Matcher::vector_length(n) < 8); 4870 match(Set dst (VectorInsert (Binary dst val) idx)); 4871 format %{ "vector_insert $dst,$val,$idx" %} 4872 ins_encode %{ 4873 assert(UseSSE >= 4, "sanity"); 4874 4875 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4876 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4877 4878 uint x_idx = $idx$$constant & right_n_bits(2); 4879 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4880 %} 4881 ins_pipe( pipe_slow ); 4882 %} 4883 4884 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4885 predicate(Matcher::vector_length(n) >= 8); 4886 match(Set dst (VectorInsert (Binary src val) idx)); 4887 effect(TEMP vtmp); 4888 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4889 ins_encode %{ 4890 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4891 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4892 4893 int vlen = Matcher::vector_length(this); 4894 uint x_idx = $idx$$constant & right_n_bits(2); 4895 if (vlen == 8) { 4896 uint y_idx = ($idx$$constant >> 2) & 1; 4897 int vlen_enc = Assembler::AVX_256bit; 4898 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4899 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4900 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4901 } else { 4902 assert(vlen == 16, "sanity"); 4903 uint y_idx = ($idx$$constant >> 2) & 3; 4904 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4905 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4906 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4907 } 4908 %} 4909 ins_pipe( pipe_slow ); 4910 %} 4911 4912 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4913 predicate(Matcher::vector_length(n) == 2); 4914 match(Set dst (VectorInsert (Binary dst val) idx)); 4915 effect(TEMP tmp); 4916 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4917 ins_encode %{ 4918 assert(UseSSE >= 4, "sanity"); 4919 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4920 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4921 4922 __ movq($tmp$$Register, $val$$XMMRegister); 4923 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4924 %} 4925 ins_pipe( pipe_slow ); 4926 %} 4927 4928 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4929 predicate(Matcher::vector_length(n) == 4); 4930 match(Set dst (VectorInsert (Binary src val) idx)); 4931 effect(TEMP vtmp, TEMP tmp); 4932 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4933 ins_encode %{ 4934 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4935 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4936 4937 uint x_idx = $idx$$constant & right_n_bits(1); 4938 uint y_idx = ($idx$$constant >> 1) & 1; 4939 int vlen_enc = Assembler::AVX_256bit; 4940 __ movq($tmp$$Register, $val$$XMMRegister); 4941 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4942 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4943 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4944 %} 4945 ins_pipe( pipe_slow ); 4946 %} 4947 4948 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4949 predicate(Matcher::vector_length(n) == 8); 4950 match(Set dst (VectorInsert (Binary src val) idx)); 4951 effect(TEMP tmp, TEMP vtmp); 4952 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4953 ins_encode %{ 4954 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4955 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4956 4957 uint x_idx = $idx$$constant & right_n_bits(1); 4958 uint y_idx = ($idx$$constant >> 1) & 3; 4959 __ movq($tmp$$Register, $val$$XMMRegister); 4960 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4961 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4962 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 // ====================REDUCTION ARITHMETIC======================================= 4968 4969 // =======================Int Reduction========================================== 4970 4971 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4972 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4973 match(Set dst (AddReductionVI src1 src2)); 4974 match(Set dst (MulReductionVI src1 src2)); 4975 match(Set dst (AndReductionV src1 src2)); 4976 match(Set dst ( OrReductionV src1 src2)); 4977 match(Set dst (XorReductionV src1 src2)); 4978 match(Set dst (MinReductionV src1 src2)); 4979 match(Set dst (MaxReductionV src1 src2)); 4980 effect(TEMP vtmp1, TEMP vtmp2); 4981 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4982 ins_encode %{ 4983 int opcode = this->ideal_Opcode(); 4984 int vlen = Matcher::vector_length(this, $src2); 4985 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 // =======================Long Reduction========================================== 4991 4992 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4993 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4994 match(Set dst (AddReductionVL src1 src2)); 4995 match(Set dst (MulReductionVL src1 src2)); 4996 match(Set dst (AndReductionV src1 src2)); 4997 match(Set dst ( OrReductionV src1 src2)); 4998 match(Set dst (XorReductionV src1 src2)); 4999 match(Set dst (MinReductionV src1 src2)); 5000 match(Set dst (MaxReductionV src1 src2)); 5001 effect(TEMP vtmp1, TEMP vtmp2); 5002 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5003 ins_encode %{ 5004 int opcode = this->ideal_Opcode(); 5005 int vlen = Matcher::vector_length(this, $src2); 5006 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5012 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5013 match(Set dst (AddReductionVL src1 src2)); 5014 match(Set dst (MulReductionVL src1 src2)); 5015 match(Set dst (AndReductionV src1 src2)); 5016 match(Set dst ( OrReductionV src1 src2)); 5017 match(Set dst (XorReductionV src1 src2)); 5018 match(Set dst (MinReductionV src1 src2)); 5019 match(Set dst (MaxReductionV src1 src2)); 5020 effect(TEMP vtmp1, TEMP vtmp2); 5021 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5022 ins_encode %{ 5023 int opcode = this->ideal_Opcode(); 5024 int vlen = Matcher::vector_length(this, $src2); 5025 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5026 %} 5027 ins_pipe( pipe_slow ); 5028 %} 5029 5030 // =======================Float Reduction========================================== 5031 5032 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5033 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5034 match(Set dst (AddReductionVF dst src)); 5035 match(Set dst (MulReductionVF dst src)); 5036 effect(TEMP dst, TEMP vtmp); 5037 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5038 ins_encode %{ 5039 int opcode = this->ideal_Opcode(); 5040 int vlen = Matcher::vector_length(this, $src); 5041 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5042 %} 5043 ins_pipe( pipe_slow ); 5044 %} 5045 5046 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5047 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5048 match(Set dst (AddReductionVF dst src)); 5049 match(Set dst (MulReductionVF dst src)); 5050 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5051 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5052 ins_encode %{ 5053 int opcode = this->ideal_Opcode(); 5054 int vlen = Matcher::vector_length(this, $src); 5055 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5056 %} 5057 ins_pipe( pipe_slow ); 5058 %} 5059 5060 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5061 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5062 match(Set dst (AddReductionVF dst src)); 5063 match(Set dst (MulReductionVF dst src)); 5064 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5065 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5066 ins_encode %{ 5067 int opcode = this->ideal_Opcode(); 5068 int vlen = Matcher::vector_length(this, $src); 5069 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5070 %} 5071 ins_pipe( pipe_slow ); 5072 %} 5073 5074 5075 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5076 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5077 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5078 // src1 contains reduction identity 5079 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5080 match(Set dst (AddReductionVF src1 src2)); 5081 match(Set dst (MulReductionVF src1 src2)); 5082 effect(TEMP dst); 5083 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5084 ins_encode %{ 5085 int opcode = this->ideal_Opcode(); 5086 int vlen = Matcher::vector_length(this, $src2); 5087 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5093 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5094 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5095 // src1 contains reduction identity 5096 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5097 match(Set dst (AddReductionVF src1 src2)); 5098 match(Set dst (MulReductionVF src1 src2)); 5099 effect(TEMP dst, TEMP vtmp); 5100 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5101 ins_encode %{ 5102 int opcode = this->ideal_Opcode(); 5103 int vlen = Matcher::vector_length(this, $src2); 5104 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5105 %} 5106 ins_pipe( pipe_slow ); 5107 %} 5108 5109 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5110 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5111 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5112 // src1 contains reduction identity 5113 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5114 match(Set dst (AddReductionVF src1 src2)); 5115 match(Set dst (MulReductionVF src1 src2)); 5116 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5117 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5118 ins_encode %{ 5119 int opcode = this->ideal_Opcode(); 5120 int vlen = Matcher::vector_length(this, $src2); 5121 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5127 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5128 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5129 // src1 contains reduction identity 5130 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5131 match(Set dst (AddReductionVF src1 src2)); 5132 match(Set dst (MulReductionVF src1 src2)); 5133 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5134 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5135 ins_encode %{ 5136 int opcode = this->ideal_Opcode(); 5137 int vlen = Matcher::vector_length(this, $src2); 5138 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 %} 5142 5143 // =======================Double Reduction========================================== 5144 5145 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5146 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5147 match(Set dst (AddReductionVD dst src)); 5148 match(Set dst (MulReductionVD dst src)); 5149 effect(TEMP dst, TEMP vtmp); 5150 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5151 ins_encode %{ 5152 int opcode = this->ideal_Opcode(); 5153 int vlen = Matcher::vector_length(this, $src); 5154 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5155 %} 5156 ins_pipe( pipe_slow ); 5157 %} 5158 5159 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5160 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5161 match(Set dst (AddReductionVD dst src)); 5162 match(Set dst (MulReductionVD dst src)); 5163 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5164 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5165 ins_encode %{ 5166 int opcode = this->ideal_Opcode(); 5167 int vlen = Matcher::vector_length(this, $src); 5168 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5169 %} 5170 ins_pipe( pipe_slow ); 5171 %} 5172 5173 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5174 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5175 match(Set dst (AddReductionVD dst src)); 5176 match(Set dst (MulReductionVD dst src)); 5177 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5178 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5179 ins_encode %{ 5180 int opcode = this->ideal_Opcode(); 5181 int vlen = Matcher::vector_length(this, $src); 5182 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5188 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5189 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5190 // src1 contains reduction identity 5191 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5192 match(Set dst (AddReductionVD src1 src2)); 5193 match(Set dst (MulReductionVD src1 src2)); 5194 effect(TEMP dst); 5195 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5196 ins_encode %{ 5197 int opcode = this->ideal_Opcode(); 5198 int vlen = Matcher::vector_length(this, $src2); 5199 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5200 %} 5201 ins_pipe( pipe_slow ); 5202 %} 5203 5204 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5205 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5206 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5207 // src1 contains reduction identity 5208 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5209 match(Set dst (AddReductionVD src1 src2)); 5210 match(Set dst (MulReductionVD src1 src2)); 5211 effect(TEMP dst, TEMP vtmp); 5212 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5213 ins_encode %{ 5214 int opcode = this->ideal_Opcode(); 5215 int vlen = Matcher::vector_length(this, $src2); 5216 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5222 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5223 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5224 // src1 contains reduction identity 5225 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5226 match(Set dst (AddReductionVD src1 src2)); 5227 match(Set dst (MulReductionVD src1 src2)); 5228 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5229 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5230 ins_encode %{ 5231 int opcode = this->ideal_Opcode(); 5232 int vlen = Matcher::vector_length(this, $src2); 5233 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5234 %} 5235 ins_pipe( pipe_slow ); 5236 %} 5237 5238 // =======================Byte Reduction========================================== 5239 5240 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5241 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5242 match(Set dst (AddReductionVI src1 src2)); 5243 match(Set dst (AndReductionV src1 src2)); 5244 match(Set dst ( OrReductionV src1 src2)); 5245 match(Set dst (XorReductionV src1 src2)); 5246 match(Set dst (MinReductionV src1 src2)); 5247 match(Set dst (MaxReductionV src1 src2)); 5248 effect(TEMP vtmp1, TEMP vtmp2); 5249 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5250 ins_encode %{ 5251 int opcode = this->ideal_Opcode(); 5252 int vlen = Matcher::vector_length(this, $src2); 5253 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5254 %} 5255 ins_pipe( pipe_slow ); 5256 %} 5257 5258 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5259 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5260 match(Set dst (AddReductionVI src1 src2)); 5261 match(Set dst (AndReductionV src1 src2)); 5262 match(Set dst ( OrReductionV src1 src2)); 5263 match(Set dst (XorReductionV src1 src2)); 5264 match(Set dst (MinReductionV src1 src2)); 5265 match(Set dst (MaxReductionV src1 src2)); 5266 effect(TEMP vtmp1, TEMP vtmp2); 5267 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5268 ins_encode %{ 5269 int opcode = this->ideal_Opcode(); 5270 int vlen = Matcher::vector_length(this, $src2); 5271 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5272 %} 5273 ins_pipe( pipe_slow ); 5274 %} 5275 5276 // =======================Short Reduction========================================== 5277 5278 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5279 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5280 match(Set dst (AddReductionVI src1 src2)); 5281 match(Set dst (MulReductionVI src1 src2)); 5282 match(Set dst (AndReductionV src1 src2)); 5283 match(Set dst ( OrReductionV src1 src2)); 5284 match(Set dst (XorReductionV src1 src2)); 5285 match(Set dst (MinReductionV src1 src2)); 5286 match(Set dst (MaxReductionV src1 src2)); 5287 effect(TEMP vtmp1, TEMP vtmp2); 5288 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5289 ins_encode %{ 5290 int opcode = this->ideal_Opcode(); 5291 int vlen = Matcher::vector_length(this, $src2); 5292 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 5297 // =======================Mul Reduction========================================== 5298 5299 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5300 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5301 Matcher::vector_length(n->in(2)) <= 32); // src2 5302 match(Set dst (MulReductionVI src1 src2)); 5303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5304 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5305 ins_encode %{ 5306 int opcode = this->ideal_Opcode(); 5307 int vlen = Matcher::vector_length(this, $src2); 5308 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5309 %} 5310 ins_pipe( pipe_slow ); 5311 %} 5312 5313 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5314 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5315 Matcher::vector_length(n->in(2)) == 64); // src2 5316 match(Set dst (MulReductionVI src1 src2)); 5317 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5318 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5319 ins_encode %{ 5320 int opcode = this->ideal_Opcode(); 5321 int vlen = Matcher::vector_length(this, $src2); 5322 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5323 %} 5324 ins_pipe( pipe_slow ); 5325 %} 5326 5327 //--------------------Min/Max Float Reduction -------------------- 5328 // Float Min Reduction 5329 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5330 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5331 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5332 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5333 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5334 Matcher::vector_length(n->in(2)) == 2); 5335 match(Set dst (MinReductionV src1 src2)); 5336 match(Set dst (MaxReductionV src1 src2)); 5337 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5338 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5339 ins_encode %{ 5340 assert(UseAVX > 0, "sanity"); 5341 5342 int opcode = this->ideal_Opcode(); 5343 int vlen = Matcher::vector_length(this, $src2); 5344 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5345 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5351 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5352 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5353 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5354 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5355 Matcher::vector_length(n->in(2)) >= 4); 5356 match(Set dst (MinReductionV src1 src2)); 5357 match(Set dst (MaxReductionV src1 src2)); 5358 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5359 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5360 ins_encode %{ 5361 assert(UseAVX > 0, "sanity"); 5362 5363 int opcode = this->ideal_Opcode(); 5364 int vlen = Matcher::vector_length(this, $src2); 5365 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5366 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, 5372 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5373 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5374 Matcher::vector_length(n->in(2)) == 2); 5375 match(Set dst (MinReductionV dst src)); 5376 match(Set dst (MaxReductionV dst src)); 5377 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5378 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5379 ins_encode %{ 5380 assert(UseAVX > 0, "sanity"); 5381 5382 int opcode = this->ideal_Opcode(); 5383 int vlen = Matcher::vector_length(this, $src); 5384 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5385 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5386 %} 5387 ins_pipe( pipe_slow ); 5388 %} 5389 5390 5391 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, 5392 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5393 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5394 Matcher::vector_length(n->in(2)) >= 4); 5395 match(Set dst (MinReductionV dst src)); 5396 match(Set dst (MaxReductionV dst src)); 5397 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5398 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5399 ins_encode %{ 5400 assert(UseAVX > 0, "sanity"); 5401 5402 int opcode = this->ideal_Opcode(); 5403 int vlen = Matcher::vector_length(this, $src); 5404 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5405 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5406 %} 5407 ins_pipe( pipe_slow ); 5408 %} 5409 5410 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ 5411 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5412 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5413 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5414 Matcher::vector_length(n->in(2)) == 2); 5415 match(Set dst (MinReductionV src1 src2)); 5416 match(Set dst (MaxReductionV src1 src2)); 5417 effect(TEMP dst, TEMP xtmp1); 5418 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} 5419 ins_encode %{ 5420 int opcode = this->ideal_Opcode(); 5421 int vlen = Matcher::vector_length(this, $src2); 5422 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5423 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5424 %} 5425 ins_pipe( pipe_slow ); 5426 %} 5427 5428 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ 5429 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5430 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5431 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5432 Matcher::vector_length(n->in(2)) >= 4); 5433 match(Set dst (MinReductionV src1 src2)); 5434 match(Set dst (MaxReductionV src1 src2)); 5435 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5436 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} 5437 ins_encode %{ 5438 int opcode = this->ideal_Opcode(); 5439 int vlen = Matcher::vector_length(this, $src2); 5440 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5441 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ 5447 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5448 Matcher::vector_length(n->in(2)) == 2); 5449 match(Set dst (MinReductionV dst src)); 5450 match(Set dst (MaxReductionV dst src)); 5451 effect(TEMP dst, TEMP xtmp1); 5452 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} 5453 ins_encode %{ 5454 int opcode = this->ideal_Opcode(); 5455 int vlen = Matcher::vector_length(this, $src); 5456 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5457 $xtmp1$$XMMRegister); 5458 %} 5459 ins_pipe( pipe_slow ); 5460 %} 5461 5462 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ 5463 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5464 Matcher::vector_length(n->in(2)) >= 4); 5465 match(Set dst (MinReductionV dst src)); 5466 match(Set dst (MaxReductionV dst src)); 5467 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5468 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} 5469 ins_encode %{ 5470 int opcode = this->ideal_Opcode(); 5471 int vlen = Matcher::vector_length(this, $src); 5472 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5473 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 //--------------------Min Double Reduction -------------------- 5479 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5480 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5481 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5482 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5483 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5484 Matcher::vector_length(n->in(2)) == 2); 5485 match(Set dst (MinReductionV src1 src2)); 5486 match(Set dst (MaxReductionV src1 src2)); 5487 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5488 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5489 ins_encode %{ 5490 assert(UseAVX > 0, "sanity"); 5491 5492 int opcode = this->ideal_Opcode(); 5493 int vlen = Matcher::vector_length(this, $src2); 5494 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5495 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5496 %} 5497 ins_pipe( pipe_slow ); 5498 %} 5499 5500 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5501 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5502 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5503 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5504 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5505 Matcher::vector_length(n->in(2)) >= 4); 5506 match(Set dst (MinReductionV src1 src2)); 5507 match(Set dst (MaxReductionV src1 src2)); 5508 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5509 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5510 ins_encode %{ 5511 assert(UseAVX > 0, "sanity"); 5512 5513 int opcode = this->ideal_Opcode(); 5514 int vlen = Matcher::vector_length(this, $src2); 5515 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5516 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5517 %} 5518 ins_pipe( pipe_slow ); 5519 %} 5520 5521 5522 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, 5523 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5524 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5525 Matcher::vector_length(n->in(2)) == 2); 5526 match(Set dst (MinReductionV dst src)); 5527 match(Set dst (MaxReductionV dst src)); 5528 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5529 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5530 ins_encode %{ 5531 assert(UseAVX > 0, "sanity"); 5532 5533 int opcode = this->ideal_Opcode(); 5534 int vlen = Matcher::vector_length(this, $src); 5535 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5536 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5537 %} 5538 ins_pipe( pipe_slow ); 5539 %} 5540 5541 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, 5542 legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5543 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5544 Matcher::vector_length(n->in(2)) >= 4); 5545 match(Set dst (MinReductionV dst src)); 5546 match(Set dst (MaxReductionV dst src)); 5547 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5548 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5549 ins_encode %{ 5550 assert(UseAVX > 0, "sanity"); 5551 5552 int opcode = this->ideal_Opcode(); 5553 int vlen = Matcher::vector_length(this, $src); 5554 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5555 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5556 %} 5557 ins_pipe( pipe_slow ); 5558 %} 5559 5560 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ 5561 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5562 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5563 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5564 Matcher::vector_length(n->in(2)) == 2); 5565 match(Set dst (MinReductionV src1 src2)); 5566 match(Set dst (MaxReductionV src1 src2)); 5567 effect(TEMP dst, TEMP xtmp1); 5568 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} 5569 ins_encode %{ 5570 int opcode = this->ideal_Opcode(); 5571 int vlen = Matcher::vector_length(this, $src2); 5572 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, 5573 xnoreg, xnoreg, $xtmp1$$XMMRegister); 5574 %} 5575 ins_pipe( pipe_slow ); 5576 %} 5577 5578 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ 5579 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5580 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5581 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5582 Matcher::vector_length(n->in(2)) >= 4); 5583 match(Set dst (MinReductionV src1 src2)); 5584 match(Set dst (MaxReductionV src1 src2)); 5585 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5586 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} 5587 ins_encode %{ 5588 int opcode = this->ideal_Opcode(); 5589 int vlen = Matcher::vector_length(this, $src2); 5590 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5591 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594 %} 5595 5596 5597 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ 5598 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5599 Matcher::vector_length(n->in(2)) == 2); 5600 match(Set dst (MinReductionV dst src)); 5601 match(Set dst (MaxReductionV dst src)); 5602 effect(TEMP dst, TEMP xtmp1); 5603 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} 5604 ins_encode %{ 5605 int opcode = this->ideal_Opcode(); 5606 int vlen = Matcher::vector_length(this, $src); 5607 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5608 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ 5614 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5615 Matcher::vector_length(n->in(2)) >= 4); 5616 match(Set dst (MinReductionV dst src)); 5617 match(Set dst (MaxReductionV dst src)); 5618 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5619 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} 5620 ins_encode %{ 5621 int opcode = this->ideal_Opcode(); 5622 int vlen = Matcher::vector_length(this, $src); 5623 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5624 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 // ====================VECTOR ARITHMETIC======================================= 5630 5631 // --------------------------------- ADD -------------------------------------- 5632 5633 // Bytes vector add 5634 instruct vaddB(vec dst, vec src) %{ 5635 predicate(UseAVX == 0); 5636 match(Set dst (AddVB dst src)); 5637 format %{ "paddb $dst,$src\t! add packedB" %} 5638 ins_encode %{ 5639 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5645 predicate(UseAVX > 0); 5646 match(Set dst (AddVB src1 src2)); 5647 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5648 ins_encode %{ 5649 int vlen_enc = vector_length_encoding(this); 5650 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5651 %} 5652 ins_pipe( pipe_slow ); 5653 %} 5654 5655 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5656 predicate((UseAVX > 0) && 5657 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5658 match(Set dst (AddVB src (LoadVector mem))); 5659 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5660 ins_encode %{ 5661 int vlen_enc = vector_length_encoding(this); 5662 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5663 %} 5664 ins_pipe( pipe_slow ); 5665 %} 5666 5667 // Shorts/Chars vector add 5668 instruct vaddS(vec dst, vec src) %{ 5669 predicate(UseAVX == 0); 5670 match(Set dst (AddVS dst src)); 5671 format %{ "paddw $dst,$src\t! add packedS" %} 5672 ins_encode %{ 5673 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5674 %} 5675 ins_pipe( pipe_slow ); 5676 %} 5677 5678 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5679 predicate(UseAVX > 0); 5680 match(Set dst (AddVS src1 src2)); 5681 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5682 ins_encode %{ 5683 int vlen_enc = vector_length_encoding(this); 5684 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5685 %} 5686 ins_pipe( pipe_slow ); 5687 %} 5688 5689 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5690 predicate((UseAVX > 0) && 5691 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5692 match(Set dst (AddVS src (LoadVector mem))); 5693 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5694 ins_encode %{ 5695 int vlen_enc = vector_length_encoding(this); 5696 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5697 %} 5698 ins_pipe( pipe_slow ); 5699 %} 5700 5701 // Integers vector add 5702 instruct vaddI(vec dst, vec src) %{ 5703 predicate(UseAVX == 0); 5704 match(Set dst (AddVI dst src)); 5705 format %{ "paddd $dst,$src\t! add packedI" %} 5706 ins_encode %{ 5707 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5713 predicate(UseAVX > 0); 5714 match(Set dst (AddVI src1 src2)); 5715 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5716 ins_encode %{ 5717 int vlen_enc = vector_length_encoding(this); 5718 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5719 %} 5720 ins_pipe( pipe_slow ); 5721 %} 5722 5723 5724 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5725 predicate((UseAVX > 0) && 5726 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5727 match(Set dst (AddVI src (LoadVector mem))); 5728 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5729 ins_encode %{ 5730 int vlen_enc = vector_length_encoding(this); 5731 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5732 %} 5733 ins_pipe( pipe_slow ); 5734 %} 5735 5736 // Longs vector add 5737 instruct vaddL(vec dst, vec src) %{ 5738 predicate(UseAVX == 0); 5739 match(Set dst (AddVL dst src)); 5740 format %{ "paddq $dst,$src\t! add packedL" %} 5741 ins_encode %{ 5742 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5743 %} 5744 ins_pipe( pipe_slow ); 5745 %} 5746 5747 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5748 predicate(UseAVX > 0); 5749 match(Set dst (AddVL src1 src2)); 5750 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5751 ins_encode %{ 5752 int vlen_enc = vector_length_encoding(this); 5753 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5759 predicate((UseAVX > 0) && 5760 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5761 match(Set dst (AddVL src (LoadVector mem))); 5762 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5763 ins_encode %{ 5764 int vlen_enc = vector_length_encoding(this); 5765 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 // Floats vector add 5771 instruct vaddF(vec dst, vec src) %{ 5772 predicate(UseAVX == 0); 5773 match(Set dst (AddVF dst src)); 5774 format %{ "addps $dst,$src\t! add packedF" %} 5775 ins_encode %{ 5776 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5782 predicate(UseAVX > 0); 5783 match(Set dst (AddVF src1 src2)); 5784 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5785 ins_encode %{ 5786 int vlen_enc = vector_length_encoding(this); 5787 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5793 predicate((UseAVX > 0) && 5794 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5795 match(Set dst (AddVF src (LoadVector mem))); 5796 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5797 ins_encode %{ 5798 int vlen_enc = vector_length_encoding(this); 5799 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 // Doubles vector add 5805 instruct vaddD(vec dst, vec src) %{ 5806 predicate(UseAVX == 0); 5807 match(Set dst (AddVD dst src)); 5808 format %{ "addpd $dst,$src\t! add packedD" %} 5809 ins_encode %{ 5810 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5816 predicate(UseAVX > 0); 5817 match(Set dst (AddVD src1 src2)); 5818 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5819 ins_encode %{ 5820 int vlen_enc = vector_length_encoding(this); 5821 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5827 predicate((UseAVX > 0) && 5828 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5829 match(Set dst (AddVD src (LoadVector mem))); 5830 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5831 ins_encode %{ 5832 int vlen_enc = vector_length_encoding(this); 5833 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5834 %} 5835 ins_pipe( pipe_slow ); 5836 %} 5837 5838 // --------------------------------- SUB -------------------------------------- 5839 5840 // Bytes vector sub 5841 instruct vsubB(vec dst, vec src) %{ 5842 predicate(UseAVX == 0); 5843 match(Set dst (SubVB dst src)); 5844 format %{ "psubb $dst,$src\t! sub packedB" %} 5845 ins_encode %{ 5846 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5847 %} 5848 ins_pipe( pipe_slow ); 5849 %} 5850 5851 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5852 predicate(UseAVX > 0); 5853 match(Set dst (SubVB src1 src2)); 5854 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5855 ins_encode %{ 5856 int vlen_enc = vector_length_encoding(this); 5857 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5858 %} 5859 ins_pipe( pipe_slow ); 5860 %} 5861 5862 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5863 predicate((UseAVX > 0) && 5864 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5865 match(Set dst (SubVB src (LoadVector mem))); 5866 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5867 ins_encode %{ 5868 int vlen_enc = vector_length_encoding(this); 5869 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5870 %} 5871 ins_pipe( pipe_slow ); 5872 %} 5873 5874 // Shorts/Chars vector sub 5875 instruct vsubS(vec dst, vec src) %{ 5876 predicate(UseAVX == 0); 5877 match(Set dst (SubVS dst src)); 5878 format %{ "psubw $dst,$src\t! sub packedS" %} 5879 ins_encode %{ 5880 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 5886 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5887 predicate(UseAVX > 0); 5888 match(Set dst (SubVS src1 src2)); 5889 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5890 ins_encode %{ 5891 int vlen_enc = vector_length_encoding(this); 5892 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5893 %} 5894 ins_pipe( pipe_slow ); 5895 %} 5896 5897 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5898 predicate((UseAVX > 0) && 5899 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5900 match(Set dst (SubVS src (LoadVector mem))); 5901 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5902 ins_encode %{ 5903 int vlen_enc = vector_length_encoding(this); 5904 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5905 %} 5906 ins_pipe( pipe_slow ); 5907 %} 5908 5909 // Integers vector sub 5910 instruct vsubI(vec dst, vec src) %{ 5911 predicate(UseAVX == 0); 5912 match(Set dst (SubVI dst src)); 5913 format %{ "psubd $dst,$src\t! sub packedI" %} 5914 ins_encode %{ 5915 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5916 %} 5917 ins_pipe( pipe_slow ); 5918 %} 5919 5920 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5921 predicate(UseAVX > 0); 5922 match(Set dst (SubVI src1 src2)); 5923 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5924 ins_encode %{ 5925 int vlen_enc = vector_length_encoding(this); 5926 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5927 %} 5928 ins_pipe( pipe_slow ); 5929 %} 5930 5931 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5932 predicate((UseAVX > 0) && 5933 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5934 match(Set dst (SubVI src (LoadVector mem))); 5935 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5936 ins_encode %{ 5937 int vlen_enc = vector_length_encoding(this); 5938 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5939 %} 5940 ins_pipe( pipe_slow ); 5941 %} 5942 5943 // Longs vector sub 5944 instruct vsubL(vec dst, vec src) %{ 5945 predicate(UseAVX == 0); 5946 match(Set dst (SubVL dst src)); 5947 format %{ "psubq $dst,$src\t! sub packedL" %} 5948 ins_encode %{ 5949 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5950 %} 5951 ins_pipe( pipe_slow ); 5952 %} 5953 5954 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5955 predicate(UseAVX > 0); 5956 match(Set dst (SubVL src1 src2)); 5957 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5958 ins_encode %{ 5959 int vlen_enc = vector_length_encoding(this); 5960 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5961 %} 5962 ins_pipe( pipe_slow ); 5963 %} 5964 5965 5966 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5967 predicate((UseAVX > 0) && 5968 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5969 match(Set dst (SubVL src (LoadVector mem))); 5970 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5971 ins_encode %{ 5972 int vlen_enc = vector_length_encoding(this); 5973 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 // Floats vector sub 5979 instruct vsubF(vec dst, vec src) %{ 5980 predicate(UseAVX == 0); 5981 match(Set dst (SubVF dst src)); 5982 format %{ "subps $dst,$src\t! sub packedF" %} 5983 ins_encode %{ 5984 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5990 predicate(UseAVX > 0); 5991 match(Set dst (SubVF src1 src2)); 5992 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5993 ins_encode %{ 5994 int vlen_enc = vector_length_encoding(this); 5995 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6001 predicate((UseAVX > 0) && 6002 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6003 match(Set dst (SubVF src (LoadVector mem))); 6004 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6005 ins_encode %{ 6006 int vlen_enc = vector_length_encoding(this); 6007 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6008 %} 6009 ins_pipe( pipe_slow ); 6010 %} 6011 6012 // Doubles vector sub 6013 instruct vsubD(vec dst, vec src) %{ 6014 predicate(UseAVX == 0); 6015 match(Set dst (SubVD dst src)); 6016 format %{ "subpd $dst,$src\t! sub packedD" %} 6017 ins_encode %{ 6018 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6024 predicate(UseAVX > 0); 6025 match(Set dst (SubVD src1 src2)); 6026 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6027 ins_encode %{ 6028 int vlen_enc = vector_length_encoding(this); 6029 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6030 %} 6031 ins_pipe( pipe_slow ); 6032 %} 6033 6034 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6035 predicate((UseAVX > 0) && 6036 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6037 match(Set dst (SubVD src (LoadVector mem))); 6038 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6039 ins_encode %{ 6040 int vlen_enc = vector_length_encoding(this); 6041 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 // --------------------------------- MUL -------------------------------------- 6047 6048 // Byte vector mul 6049 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6050 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6051 match(Set dst (MulVB src1 src2)); 6052 effect(TEMP dst, TEMP xtmp); 6053 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6054 ins_encode %{ 6055 assert(UseSSE > 3, "required"); 6056 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6057 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6058 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6059 __ psllw($dst$$XMMRegister, 8); 6060 __ psrlw($dst$$XMMRegister, 8); 6061 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6067 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6068 match(Set dst (MulVB src1 src2)); 6069 effect(TEMP dst, TEMP xtmp); 6070 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6071 ins_encode %{ 6072 assert(UseSSE > 3, "required"); 6073 // Odd-index elements 6074 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6075 __ psrlw($dst$$XMMRegister, 8); 6076 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6077 __ psrlw($xtmp$$XMMRegister, 8); 6078 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6079 __ psllw($dst$$XMMRegister, 8); 6080 // Even-index elements 6081 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6082 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6083 __ psllw($xtmp$$XMMRegister, 8); 6084 __ psrlw($xtmp$$XMMRegister, 8); 6085 // Combine 6086 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6087 %} 6088 ins_pipe( pipe_slow ); 6089 %} 6090 6091 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6092 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6093 match(Set dst (MulVB src1 src2)); 6094 effect(TEMP xtmp1, TEMP xtmp2); 6095 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6096 ins_encode %{ 6097 int vlen_enc = vector_length_encoding(this); 6098 // Odd-index elements 6099 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6100 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6101 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6102 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6103 // Even-index elements 6104 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6105 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6106 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6107 // Combine 6108 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6109 %} 6110 ins_pipe( pipe_slow ); 6111 %} 6112 6113 // Shorts/Chars vector mul 6114 instruct vmulS(vec dst, vec src) %{ 6115 predicate(UseAVX == 0); 6116 match(Set dst (MulVS dst src)); 6117 format %{ "pmullw $dst,$src\t! mul packedS" %} 6118 ins_encode %{ 6119 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6125 predicate(UseAVX > 0); 6126 match(Set dst (MulVS src1 src2)); 6127 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6128 ins_encode %{ 6129 int vlen_enc = vector_length_encoding(this); 6130 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6136 predicate((UseAVX > 0) && 6137 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6138 match(Set dst (MulVS src (LoadVector mem))); 6139 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6140 ins_encode %{ 6141 int vlen_enc = vector_length_encoding(this); 6142 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6143 %} 6144 ins_pipe( pipe_slow ); 6145 %} 6146 6147 // Integers vector mul 6148 instruct vmulI(vec dst, vec src) %{ 6149 predicate(UseAVX == 0); 6150 match(Set dst (MulVI dst src)); 6151 format %{ "pmulld $dst,$src\t! mul packedI" %} 6152 ins_encode %{ 6153 assert(UseSSE > 3, "required"); 6154 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6155 %} 6156 ins_pipe( pipe_slow ); 6157 %} 6158 6159 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6160 predicate(UseAVX > 0); 6161 match(Set dst (MulVI src1 src2)); 6162 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6163 ins_encode %{ 6164 int vlen_enc = vector_length_encoding(this); 6165 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6166 %} 6167 ins_pipe( pipe_slow ); 6168 %} 6169 6170 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6171 predicate((UseAVX > 0) && 6172 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6173 match(Set dst (MulVI src (LoadVector mem))); 6174 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6175 ins_encode %{ 6176 int vlen_enc = vector_length_encoding(this); 6177 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6178 %} 6179 ins_pipe( pipe_slow ); 6180 %} 6181 6182 // Longs vector mul 6183 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6184 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6185 VM_Version::supports_avx512dq()) || 6186 VM_Version::supports_avx512vldq()); 6187 match(Set dst (MulVL src1 src2)); 6188 ins_cost(500); 6189 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6190 ins_encode %{ 6191 assert(UseAVX > 2, "required"); 6192 int vlen_enc = vector_length_encoding(this); 6193 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6199 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6200 VM_Version::supports_avx512dq()) || 6201 (Matcher::vector_length_in_bytes(n) > 8 && 6202 VM_Version::supports_avx512vldq())); 6203 match(Set dst (MulVL src (LoadVector mem))); 6204 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6205 ins_cost(500); 6206 ins_encode %{ 6207 assert(UseAVX > 2, "required"); 6208 int vlen_enc = vector_length_encoding(this); 6209 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6210 %} 6211 ins_pipe( pipe_slow ); 6212 %} 6213 6214 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6215 predicate(UseAVX == 0); 6216 match(Set dst (MulVL src1 src2)); 6217 ins_cost(500); 6218 effect(TEMP dst, TEMP xtmp); 6219 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6220 ins_encode %{ 6221 assert(VM_Version::supports_sse4_1(), "required"); 6222 // Get the lo-hi products, only the lower 32 bits is in concerns 6223 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6224 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6225 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6226 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6227 __ psllq($dst$$XMMRegister, 32); 6228 // Get the lo-lo products 6229 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6230 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6231 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6237 predicate(UseAVX > 0 && 6238 ((Matcher::vector_length_in_bytes(n) == 64 && 6239 !VM_Version::supports_avx512dq()) || 6240 (Matcher::vector_length_in_bytes(n) < 64 && 6241 !VM_Version::supports_avx512vldq()))); 6242 match(Set dst (MulVL src1 src2)); 6243 effect(TEMP xtmp1, TEMP xtmp2); 6244 ins_cost(500); 6245 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6246 ins_encode %{ 6247 int vlen_enc = vector_length_encoding(this); 6248 // Get the lo-hi products, only the lower 32 bits is in concerns 6249 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6250 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6251 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6252 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6253 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6254 // Get the lo-lo products 6255 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6256 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6262 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6263 match(Set dst (MulVL src1 src2)); 6264 ins_cost(100); 6265 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6266 ins_encode %{ 6267 int vlen_enc = vector_length_encoding(this); 6268 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6274 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6275 match(Set dst (MulVL src1 src2)); 6276 ins_cost(100); 6277 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6278 ins_encode %{ 6279 int vlen_enc = vector_length_encoding(this); 6280 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 // Floats vector mul 6286 instruct vmulF(vec dst, vec src) %{ 6287 predicate(UseAVX == 0); 6288 match(Set dst (MulVF dst src)); 6289 format %{ "mulps $dst,$src\t! mul packedF" %} 6290 ins_encode %{ 6291 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6297 predicate(UseAVX > 0); 6298 match(Set dst (MulVF src1 src2)); 6299 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6300 ins_encode %{ 6301 int vlen_enc = vector_length_encoding(this); 6302 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6308 predicate((UseAVX > 0) && 6309 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6310 match(Set dst (MulVF src (LoadVector mem))); 6311 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6312 ins_encode %{ 6313 int vlen_enc = vector_length_encoding(this); 6314 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 // Doubles vector mul 6320 instruct vmulD(vec dst, vec src) %{ 6321 predicate(UseAVX == 0); 6322 match(Set dst (MulVD dst src)); 6323 format %{ "mulpd $dst,$src\t! mul packedD" %} 6324 ins_encode %{ 6325 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6331 predicate(UseAVX > 0); 6332 match(Set dst (MulVD src1 src2)); 6333 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6334 ins_encode %{ 6335 int vlen_enc = vector_length_encoding(this); 6336 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6337 %} 6338 ins_pipe( pipe_slow ); 6339 %} 6340 6341 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6342 predicate((UseAVX > 0) && 6343 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6344 match(Set dst (MulVD src (LoadVector mem))); 6345 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6346 ins_encode %{ 6347 int vlen_enc = vector_length_encoding(this); 6348 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 // --------------------------------- DIV -------------------------------------- 6354 6355 // Floats vector div 6356 instruct vdivF(vec dst, vec src) %{ 6357 predicate(UseAVX == 0); 6358 match(Set dst (DivVF dst src)); 6359 format %{ "divps $dst,$src\t! div packedF" %} 6360 ins_encode %{ 6361 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6367 predicate(UseAVX > 0); 6368 match(Set dst (DivVF src1 src2)); 6369 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6370 ins_encode %{ 6371 int vlen_enc = vector_length_encoding(this); 6372 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6378 predicate((UseAVX > 0) && 6379 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6380 match(Set dst (DivVF src (LoadVector mem))); 6381 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6382 ins_encode %{ 6383 int vlen_enc = vector_length_encoding(this); 6384 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6385 %} 6386 ins_pipe( pipe_slow ); 6387 %} 6388 6389 // Doubles vector div 6390 instruct vdivD(vec dst, vec src) %{ 6391 predicate(UseAVX == 0); 6392 match(Set dst (DivVD dst src)); 6393 format %{ "divpd $dst,$src\t! div packedD" %} 6394 ins_encode %{ 6395 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6396 %} 6397 ins_pipe( pipe_slow ); 6398 %} 6399 6400 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6401 predicate(UseAVX > 0); 6402 match(Set dst (DivVD src1 src2)); 6403 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6404 ins_encode %{ 6405 int vlen_enc = vector_length_encoding(this); 6406 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6407 %} 6408 ins_pipe( pipe_slow ); 6409 %} 6410 6411 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6412 predicate((UseAVX > 0) && 6413 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6414 match(Set dst (DivVD src (LoadVector mem))); 6415 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6416 ins_encode %{ 6417 int vlen_enc = vector_length_encoding(this); 6418 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6419 %} 6420 ins_pipe( pipe_slow ); 6421 %} 6422 6423 // ------------------------------ MinMax --------------------------------------- 6424 6425 // Byte, Short, Int vector Min/Max 6426 instruct minmax_reg_sse(vec dst, vec src) %{ 6427 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6428 UseAVX == 0); 6429 match(Set dst (MinV dst src)); 6430 match(Set dst (MaxV dst src)); 6431 format %{ "vector_minmax $dst,$src\t! " %} 6432 ins_encode %{ 6433 assert(UseSSE >= 4, "required"); 6434 6435 int opcode = this->ideal_Opcode(); 6436 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6437 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6438 %} 6439 ins_pipe( pipe_slow ); 6440 %} 6441 6442 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6443 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6444 UseAVX > 0); 6445 match(Set dst (MinV src1 src2)); 6446 match(Set dst (MaxV src1 src2)); 6447 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6448 ins_encode %{ 6449 int opcode = this->ideal_Opcode(); 6450 int vlen_enc = vector_length_encoding(this); 6451 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6452 6453 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // Long vector Min/Max 6459 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6460 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6461 UseAVX == 0); 6462 match(Set dst (MinV dst src)); 6463 match(Set dst (MaxV src dst)); 6464 effect(TEMP dst, TEMP tmp); 6465 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6466 ins_encode %{ 6467 assert(UseSSE >= 4, "required"); 6468 6469 int opcode = this->ideal_Opcode(); 6470 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6471 assert(elem_bt == T_LONG, "sanity"); 6472 6473 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6474 %} 6475 ins_pipe( pipe_slow ); 6476 %} 6477 6478 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6479 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6480 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6481 match(Set dst (MinV src1 src2)); 6482 match(Set dst (MaxV src1 src2)); 6483 effect(TEMP dst); 6484 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6485 ins_encode %{ 6486 int vlen_enc = vector_length_encoding(this); 6487 int opcode = this->ideal_Opcode(); 6488 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6489 assert(elem_bt == T_LONG, "sanity"); 6490 6491 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6492 %} 6493 ins_pipe( pipe_slow ); 6494 %} 6495 6496 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6497 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6498 Matcher::vector_element_basic_type(n) == T_LONG); 6499 match(Set dst (MinV src1 src2)); 6500 match(Set dst (MaxV src1 src2)); 6501 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6502 ins_encode %{ 6503 assert(UseAVX > 2, "required"); 6504 6505 int vlen_enc = vector_length_encoding(this); 6506 int opcode = this->ideal_Opcode(); 6507 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6508 assert(elem_bt == T_LONG, "sanity"); 6509 6510 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 // Float/Double vector Min/Max 6516 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ 6517 predicate(VM_Version::supports_avx10_2() && 6518 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6519 match(Set dst (MinV a b)); 6520 match(Set dst (MaxV a b)); 6521 format %{ "vector_minmaxFP $dst, $a, $b" %} 6522 ins_encode %{ 6523 int vlen_enc = vector_length_encoding(this); 6524 int opcode = this->ideal_Opcode(); 6525 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6526 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6527 %} 6528 ins_pipe( pipe_slow ); 6529 %} 6530 6531 // Float/Double vector Min/Max 6532 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6533 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && 6534 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6535 UseAVX > 0); 6536 match(Set dst (MinV a b)); 6537 match(Set dst (MaxV a b)); 6538 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6539 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6540 ins_encode %{ 6541 assert(UseAVX > 0, "required"); 6542 6543 int opcode = this->ideal_Opcode(); 6544 int vlen_enc = vector_length_encoding(this); 6545 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6546 6547 __ vminmax_fp(opcode, elem_bt, 6548 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6549 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6550 %} 6551 ins_pipe( pipe_slow ); 6552 %} 6553 6554 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6555 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && 6556 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6557 match(Set dst (MinV a b)); 6558 match(Set dst (MaxV a b)); 6559 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6560 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6561 ins_encode %{ 6562 assert(UseAVX > 2, "required"); 6563 6564 int opcode = this->ideal_Opcode(); 6565 int vlen_enc = vector_length_encoding(this); 6566 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6567 6568 __ evminmax_fp(opcode, elem_bt, 6569 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6570 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6571 %} 6572 ins_pipe( pipe_slow ); 6573 %} 6574 6575 // ------------------------------ Unsigned vector Min/Max ---------------------- 6576 6577 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6578 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6579 match(Set dst (UMinV a b)); 6580 match(Set dst (UMaxV a b)); 6581 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6582 ins_encode %{ 6583 int opcode = this->ideal_Opcode(); 6584 int vlen_enc = vector_length_encoding(this); 6585 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6586 assert(is_integral_type(elem_bt), ""); 6587 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6588 %} 6589 ins_pipe( pipe_slow ); 6590 %} 6591 6592 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6593 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6594 match(Set dst (UMinV a (LoadVector b))); 6595 match(Set dst (UMaxV a (LoadVector b))); 6596 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6597 ins_encode %{ 6598 int opcode = this->ideal_Opcode(); 6599 int vlen_enc = vector_length_encoding(this); 6600 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6601 assert(is_integral_type(elem_bt), ""); 6602 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6603 %} 6604 ins_pipe( pipe_slow ); 6605 %} 6606 6607 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6608 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6609 match(Set dst (UMinV a b)); 6610 match(Set dst (UMaxV a b)); 6611 effect(TEMP xtmp1, TEMP xtmp2); 6612 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6613 ins_encode %{ 6614 int opcode = this->ideal_Opcode(); 6615 int vlen_enc = vector_length_encoding(this); 6616 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6617 %} 6618 ins_pipe( pipe_slow ); 6619 %} 6620 6621 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6622 match(Set dst (UMinV (Binary dst src2) mask)); 6623 match(Set dst (UMaxV (Binary dst src2) mask)); 6624 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6625 ins_encode %{ 6626 int vlen_enc = vector_length_encoding(this); 6627 BasicType bt = Matcher::vector_element_basic_type(this); 6628 int opc = this->ideal_Opcode(); 6629 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6630 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6631 %} 6632 ins_pipe( pipe_slow ); 6633 %} 6634 6635 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6636 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6637 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6638 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6639 ins_encode %{ 6640 int vlen_enc = vector_length_encoding(this); 6641 BasicType bt = Matcher::vector_element_basic_type(this); 6642 int opc = this->ideal_Opcode(); 6643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6644 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6645 %} 6646 ins_pipe( pipe_slow ); 6647 %} 6648 6649 // --------------------------------- Signum/CopySign --------------------------- 6650 6651 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6652 match(Set dst (SignumF dst (Binary zero one))); 6653 effect(KILL cr); 6654 format %{ "signumF $dst, $dst" %} 6655 ins_encode %{ 6656 int opcode = this->ideal_Opcode(); 6657 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6663 match(Set dst (SignumD dst (Binary zero one))); 6664 effect(KILL cr); 6665 format %{ "signumD $dst, $dst" %} 6666 ins_encode %{ 6667 int opcode = this->ideal_Opcode(); 6668 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6669 %} 6670 ins_pipe( pipe_slow ); 6671 %} 6672 6673 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6674 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6675 match(Set dst (SignumVF src (Binary zero one))); 6676 match(Set dst (SignumVD src (Binary zero one))); 6677 effect(TEMP dst, TEMP xtmp1); 6678 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6679 ins_encode %{ 6680 int opcode = this->ideal_Opcode(); 6681 int vec_enc = vector_length_encoding(this); 6682 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6683 $xtmp1$$XMMRegister, vec_enc); 6684 %} 6685 ins_pipe( pipe_slow ); 6686 %} 6687 6688 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6689 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6690 match(Set dst (SignumVF src (Binary zero one))); 6691 match(Set dst (SignumVD src (Binary zero one))); 6692 effect(TEMP dst, TEMP ktmp1); 6693 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6694 ins_encode %{ 6695 int opcode = this->ideal_Opcode(); 6696 int vec_enc = vector_length_encoding(this); 6697 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6698 $ktmp1$$KRegister, vec_enc); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 // --------------------------------------- 6704 // For copySign use 0xE4 as writemask for vpternlog 6705 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6706 // C (xmm2) is set to 0x7FFFFFFF 6707 // Wherever xmm2 is 0, we want to pick from B (sign) 6708 // Wherever xmm2 is 1, we want to pick from A (src) 6709 // 6710 // A B C Result 6711 // 0 0 0 0 6712 // 0 0 1 0 6713 // 0 1 0 1 6714 // 0 1 1 0 6715 // 1 0 0 0 6716 // 1 0 1 1 6717 // 1 1 0 1 6718 // 1 1 1 1 6719 // 6720 // Result going from high bit to low bit is 0x11100100 = 0xe4 6721 // --------------------------------------- 6722 6723 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6724 match(Set dst (CopySignF dst src)); 6725 effect(TEMP tmp1, TEMP tmp2); 6726 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6727 ins_encode %{ 6728 __ movl($tmp2$$Register, 0x7FFFFFFF); 6729 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6730 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6731 %} 6732 ins_pipe( pipe_slow ); 6733 %} 6734 6735 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6736 match(Set dst (CopySignD dst (Binary src zero))); 6737 ins_cost(100); 6738 effect(TEMP tmp1, TEMP tmp2); 6739 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6740 ins_encode %{ 6741 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6742 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6743 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 //----------------------------- CompressBits/ExpandBits ------------------------ 6749 6750 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6751 predicate(n->bottom_type()->isa_int()); 6752 match(Set dst (CompressBits src mask)); 6753 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6754 ins_encode %{ 6755 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6756 %} 6757 ins_pipe( pipe_slow ); 6758 %} 6759 6760 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6761 predicate(n->bottom_type()->isa_int()); 6762 match(Set dst (ExpandBits src mask)); 6763 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6764 ins_encode %{ 6765 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6771 predicate(n->bottom_type()->isa_int()); 6772 match(Set dst (CompressBits src (LoadI mask))); 6773 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6774 ins_encode %{ 6775 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6781 predicate(n->bottom_type()->isa_int()); 6782 match(Set dst (ExpandBits src (LoadI mask))); 6783 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6784 ins_encode %{ 6785 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 // --------------------------------- Sqrt -------------------------------------- 6791 6792 instruct vsqrtF_reg(vec dst, vec src) %{ 6793 match(Set dst (SqrtVF src)); 6794 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6795 ins_encode %{ 6796 assert(UseAVX > 0, "required"); 6797 int vlen_enc = vector_length_encoding(this); 6798 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6799 %} 6800 ins_pipe( pipe_slow ); 6801 %} 6802 6803 instruct vsqrtF_mem(vec dst, memory mem) %{ 6804 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6805 match(Set dst (SqrtVF (LoadVector mem))); 6806 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6807 ins_encode %{ 6808 assert(UseAVX > 0, "required"); 6809 int vlen_enc = vector_length_encoding(this); 6810 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6811 %} 6812 ins_pipe( pipe_slow ); 6813 %} 6814 6815 // Floating point vector sqrt 6816 instruct vsqrtD_reg(vec dst, vec src) %{ 6817 match(Set dst (SqrtVD src)); 6818 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6819 ins_encode %{ 6820 assert(UseAVX > 0, "required"); 6821 int vlen_enc = vector_length_encoding(this); 6822 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct vsqrtD_mem(vec dst, memory mem) %{ 6828 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6829 match(Set dst (SqrtVD (LoadVector mem))); 6830 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6831 ins_encode %{ 6832 assert(UseAVX > 0, "required"); 6833 int vlen_enc = vector_length_encoding(this); 6834 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6835 %} 6836 ins_pipe( pipe_slow ); 6837 %} 6838 6839 // ------------------------------ Shift --------------------------------------- 6840 6841 // Left and right shift count vectors are the same on x86 6842 // (only lowest bits of xmm reg are used for count). 6843 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6844 match(Set dst (LShiftCntV cnt)); 6845 match(Set dst (RShiftCntV cnt)); 6846 format %{ "movdl $dst,$cnt\t! load shift count" %} 6847 ins_encode %{ 6848 __ movdl($dst$$XMMRegister, $cnt$$Register); 6849 %} 6850 ins_pipe( pipe_slow ); 6851 %} 6852 6853 // Byte vector shift 6854 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6855 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6856 match(Set dst ( LShiftVB src shift)); 6857 match(Set dst ( RShiftVB src shift)); 6858 match(Set dst (URShiftVB src shift)); 6859 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6860 format %{"vector_byte_shift $dst,$src,$shift" %} 6861 ins_encode %{ 6862 assert(UseSSE > 3, "required"); 6863 int opcode = this->ideal_Opcode(); 6864 bool sign = (opcode != Op_URShiftVB); 6865 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6866 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6867 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6868 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6869 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6870 %} 6871 ins_pipe( pipe_slow ); 6872 %} 6873 6874 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6875 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6876 UseAVX <= 1); 6877 match(Set dst ( LShiftVB src shift)); 6878 match(Set dst ( RShiftVB src shift)); 6879 match(Set dst (URShiftVB src shift)); 6880 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6881 format %{"vector_byte_shift $dst,$src,$shift" %} 6882 ins_encode %{ 6883 assert(UseSSE > 3, "required"); 6884 int opcode = this->ideal_Opcode(); 6885 bool sign = (opcode != Op_URShiftVB); 6886 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6887 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6888 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6889 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6890 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6891 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6892 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6893 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6894 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6895 %} 6896 ins_pipe( pipe_slow ); 6897 %} 6898 6899 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6900 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6901 UseAVX > 1); 6902 match(Set dst ( LShiftVB src shift)); 6903 match(Set dst ( RShiftVB src shift)); 6904 match(Set dst (URShiftVB src shift)); 6905 effect(TEMP dst, TEMP tmp); 6906 format %{"vector_byte_shift $dst,$src,$shift" %} 6907 ins_encode %{ 6908 int opcode = this->ideal_Opcode(); 6909 bool sign = (opcode != Op_URShiftVB); 6910 int vlen_enc = Assembler::AVX_256bit; 6911 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6912 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6913 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6914 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6915 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6921 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6922 match(Set dst ( LShiftVB src shift)); 6923 match(Set dst ( RShiftVB src shift)); 6924 match(Set dst (URShiftVB src shift)); 6925 effect(TEMP dst, TEMP tmp); 6926 format %{"vector_byte_shift $dst,$src,$shift" %} 6927 ins_encode %{ 6928 assert(UseAVX > 1, "required"); 6929 int opcode = this->ideal_Opcode(); 6930 bool sign = (opcode != Op_URShiftVB); 6931 int vlen_enc = Assembler::AVX_256bit; 6932 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6933 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6934 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6935 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6936 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6937 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6938 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6939 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6940 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6946 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6947 match(Set dst ( LShiftVB src shift)); 6948 match(Set dst (RShiftVB src shift)); 6949 match(Set dst (URShiftVB src shift)); 6950 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6951 format %{"vector_byte_shift $dst,$src,$shift" %} 6952 ins_encode %{ 6953 assert(UseAVX > 2, "required"); 6954 int opcode = this->ideal_Opcode(); 6955 bool sign = (opcode != Op_URShiftVB); 6956 int vlen_enc = Assembler::AVX_512bit; 6957 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6958 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6959 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6960 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6961 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6962 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6963 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6964 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6965 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6966 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6967 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6968 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 // Shorts vector logical right shift produces incorrect Java result 6974 // for negative data because java code convert short value into int with 6975 // sign extension before a shift. But char vectors are fine since chars are 6976 // unsigned values. 6977 // Shorts/Chars vector left shift 6978 instruct vshiftS(vec dst, vec src, vec shift) %{ 6979 predicate(!n->as_ShiftV()->is_var_shift()); 6980 match(Set dst ( LShiftVS src shift)); 6981 match(Set dst ( RShiftVS src shift)); 6982 match(Set dst (URShiftVS src shift)); 6983 effect(TEMP dst, USE src, USE shift); 6984 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6985 ins_encode %{ 6986 int opcode = this->ideal_Opcode(); 6987 if (UseAVX > 0) { 6988 int vlen_enc = vector_length_encoding(this); 6989 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6990 } else { 6991 int vlen = Matcher::vector_length(this); 6992 if (vlen == 2) { 6993 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6994 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6995 } else if (vlen == 4) { 6996 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6997 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6998 } else { 6999 assert (vlen == 8, "sanity"); 7000 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7001 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7002 } 7003 } 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 // Integers vector left shift 7009 instruct vshiftI(vec dst, vec src, vec shift) %{ 7010 predicate(!n->as_ShiftV()->is_var_shift()); 7011 match(Set dst ( LShiftVI src shift)); 7012 match(Set dst ( RShiftVI src shift)); 7013 match(Set dst (URShiftVI src shift)); 7014 effect(TEMP dst, USE src, USE shift); 7015 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 7016 ins_encode %{ 7017 int opcode = this->ideal_Opcode(); 7018 if (UseAVX > 0) { 7019 int vlen_enc = vector_length_encoding(this); 7020 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7021 } else { 7022 int vlen = Matcher::vector_length(this); 7023 if (vlen == 2) { 7024 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7025 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7026 } else { 7027 assert(vlen == 4, "sanity"); 7028 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7029 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7030 } 7031 } 7032 %} 7033 ins_pipe( pipe_slow ); 7034 %} 7035 7036 // Integers vector left constant shift 7037 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7038 match(Set dst (LShiftVI src (LShiftCntV shift))); 7039 match(Set dst (RShiftVI src (RShiftCntV shift))); 7040 match(Set dst (URShiftVI src (RShiftCntV shift))); 7041 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7042 ins_encode %{ 7043 int opcode = this->ideal_Opcode(); 7044 if (UseAVX > 0) { 7045 int vector_len = vector_length_encoding(this); 7046 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7047 } else { 7048 int vlen = Matcher::vector_length(this); 7049 if (vlen == 2) { 7050 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7051 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7052 } else { 7053 assert(vlen == 4, "sanity"); 7054 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7055 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7056 } 7057 } 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 // Longs vector shift 7063 instruct vshiftL(vec dst, vec src, vec shift) %{ 7064 predicate(!n->as_ShiftV()->is_var_shift()); 7065 match(Set dst ( LShiftVL src shift)); 7066 match(Set dst (URShiftVL src shift)); 7067 effect(TEMP dst, USE src, USE shift); 7068 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7069 ins_encode %{ 7070 int opcode = this->ideal_Opcode(); 7071 if (UseAVX > 0) { 7072 int vlen_enc = vector_length_encoding(this); 7073 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7074 } else { 7075 assert(Matcher::vector_length(this) == 2, ""); 7076 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7077 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7078 } 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 // Longs vector constant shift 7084 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7085 match(Set dst (LShiftVL src (LShiftCntV shift))); 7086 match(Set dst (URShiftVL src (RShiftCntV shift))); 7087 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7088 ins_encode %{ 7089 int opcode = this->ideal_Opcode(); 7090 if (UseAVX > 0) { 7091 int vector_len = vector_length_encoding(this); 7092 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7093 } else { 7094 assert(Matcher::vector_length(this) == 2, ""); 7095 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7096 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7097 } 7098 %} 7099 ins_pipe( pipe_slow ); 7100 %} 7101 7102 // -------------------ArithmeticRightShift ----------------------------------- 7103 // Long vector arithmetic right shift 7104 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7105 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7106 match(Set dst (RShiftVL src shift)); 7107 effect(TEMP dst, TEMP tmp); 7108 format %{ "vshiftq $dst,$src,$shift" %} 7109 ins_encode %{ 7110 uint vlen = Matcher::vector_length(this); 7111 if (vlen == 2) { 7112 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7113 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7114 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7115 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7116 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7117 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7118 } else { 7119 assert(vlen == 4, "sanity"); 7120 assert(UseAVX > 1, "required"); 7121 int vlen_enc = Assembler::AVX_256bit; 7122 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7123 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7124 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7125 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7126 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7127 } 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7133 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7134 match(Set dst (RShiftVL src shift)); 7135 format %{ "vshiftq $dst,$src,$shift" %} 7136 ins_encode %{ 7137 int vlen_enc = vector_length_encoding(this); 7138 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 // ------------------- Variable Shift ----------------------------- 7144 // Byte variable shift 7145 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7146 predicate(Matcher::vector_length(n) <= 8 && 7147 n->as_ShiftV()->is_var_shift() && 7148 !VM_Version::supports_avx512bw()); 7149 match(Set dst ( LShiftVB src shift)); 7150 match(Set dst ( RShiftVB src shift)); 7151 match(Set dst (URShiftVB src shift)); 7152 effect(TEMP dst, TEMP vtmp); 7153 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7154 ins_encode %{ 7155 assert(UseAVX >= 2, "required"); 7156 7157 int opcode = this->ideal_Opcode(); 7158 int vlen_enc = Assembler::AVX_128bit; 7159 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7160 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7161 %} 7162 ins_pipe( pipe_slow ); 7163 %} 7164 7165 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7166 predicate(Matcher::vector_length(n) == 16 && 7167 n->as_ShiftV()->is_var_shift() && 7168 !VM_Version::supports_avx512bw()); 7169 match(Set dst ( LShiftVB src shift)); 7170 match(Set dst ( RShiftVB src shift)); 7171 match(Set dst (URShiftVB src shift)); 7172 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7173 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7174 ins_encode %{ 7175 assert(UseAVX >= 2, "required"); 7176 7177 int opcode = this->ideal_Opcode(); 7178 int vlen_enc = Assembler::AVX_128bit; 7179 // Shift lower half and get word result in dst 7180 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7181 7182 // Shift upper half and get word result in vtmp1 7183 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7184 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7185 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7186 7187 // Merge and down convert the two word results to byte in dst 7188 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7194 predicate(Matcher::vector_length(n) == 32 && 7195 n->as_ShiftV()->is_var_shift() && 7196 !VM_Version::supports_avx512bw()); 7197 match(Set dst ( LShiftVB src shift)); 7198 match(Set dst ( RShiftVB src shift)); 7199 match(Set dst (URShiftVB src shift)); 7200 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7201 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7202 ins_encode %{ 7203 assert(UseAVX >= 2, "required"); 7204 7205 int opcode = this->ideal_Opcode(); 7206 int vlen_enc = Assembler::AVX_128bit; 7207 // Process lower 128 bits and get result in dst 7208 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7209 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7210 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7211 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7212 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7213 7214 // Process higher 128 bits and get result in vtmp3 7215 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7216 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7217 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7218 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7219 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7220 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7221 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7222 7223 // Merge the two results in dst 7224 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7225 %} 7226 ins_pipe( pipe_slow ); 7227 %} 7228 7229 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7230 predicate(Matcher::vector_length(n) <= 32 && 7231 n->as_ShiftV()->is_var_shift() && 7232 VM_Version::supports_avx512bw()); 7233 match(Set dst ( LShiftVB src shift)); 7234 match(Set dst ( RShiftVB src shift)); 7235 match(Set dst (URShiftVB src shift)); 7236 effect(TEMP dst, TEMP vtmp); 7237 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7238 ins_encode %{ 7239 assert(UseAVX > 2, "required"); 7240 7241 int opcode = this->ideal_Opcode(); 7242 int vlen_enc = vector_length_encoding(this); 7243 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7249 predicate(Matcher::vector_length(n) == 64 && 7250 n->as_ShiftV()->is_var_shift() && 7251 VM_Version::supports_avx512bw()); 7252 match(Set dst ( LShiftVB src shift)); 7253 match(Set dst ( RShiftVB src shift)); 7254 match(Set dst (URShiftVB src shift)); 7255 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7256 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7257 ins_encode %{ 7258 assert(UseAVX > 2, "required"); 7259 7260 int opcode = this->ideal_Opcode(); 7261 int vlen_enc = Assembler::AVX_256bit; 7262 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7263 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7264 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7265 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7266 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7267 %} 7268 ins_pipe( pipe_slow ); 7269 %} 7270 7271 // Short variable shift 7272 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7273 predicate(Matcher::vector_length(n) <= 8 && 7274 n->as_ShiftV()->is_var_shift() && 7275 !VM_Version::supports_avx512bw()); 7276 match(Set dst ( LShiftVS src shift)); 7277 match(Set dst ( RShiftVS src shift)); 7278 match(Set dst (URShiftVS src shift)); 7279 effect(TEMP dst, TEMP vtmp); 7280 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7281 ins_encode %{ 7282 assert(UseAVX >= 2, "required"); 7283 7284 int opcode = this->ideal_Opcode(); 7285 bool sign = (opcode != Op_URShiftVS); 7286 int vlen_enc = Assembler::AVX_256bit; 7287 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7288 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7289 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7290 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7291 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7292 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7293 %} 7294 ins_pipe( pipe_slow ); 7295 %} 7296 7297 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7298 predicate(Matcher::vector_length(n) == 16 && 7299 n->as_ShiftV()->is_var_shift() && 7300 !VM_Version::supports_avx512bw()); 7301 match(Set dst ( LShiftVS src shift)); 7302 match(Set dst ( RShiftVS src shift)); 7303 match(Set dst (URShiftVS src shift)); 7304 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7305 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7306 ins_encode %{ 7307 assert(UseAVX >= 2, "required"); 7308 7309 int opcode = this->ideal_Opcode(); 7310 bool sign = (opcode != Op_URShiftVS); 7311 int vlen_enc = Assembler::AVX_256bit; 7312 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7313 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7314 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7315 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7316 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7317 7318 // Shift upper half, with result in dst using vtmp1 as TEMP 7319 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7320 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7321 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7322 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7323 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7324 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7325 7326 // Merge lower and upper half result into dst 7327 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7328 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7329 %} 7330 ins_pipe( pipe_slow ); 7331 %} 7332 7333 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7334 predicate(n->as_ShiftV()->is_var_shift() && 7335 VM_Version::supports_avx512bw()); 7336 match(Set dst ( LShiftVS src shift)); 7337 match(Set dst ( RShiftVS src shift)); 7338 match(Set dst (URShiftVS src shift)); 7339 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7340 ins_encode %{ 7341 assert(UseAVX > 2, "required"); 7342 7343 int opcode = this->ideal_Opcode(); 7344 int vlen_enc = vector_length_encoding(this); 7345 if (!VM_Version::supports_avx512vl()) { 7346 vlen_enc = Assembler::AVX_512bit; 7347 } 7348 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7349 %} 7350 ins_pipe( pipe_slow ); 7351 %} 7352 7353 //Integer variable shift 7354 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7355 predicate(n->as_ShiftV()->is_var_shift()); 7356 match(Set dst ( LShiftVI src shift)); 7357 match(Set dst ( RShiftVI src shift)); 7358 match(Set dst (URShiftVI src shift)); 7359 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7360 ins_encode %{ 7361 assert(UseAVX >= 2, "required"); 7362 7363 int opcode = this->ideal_Opcode(); 7364 int vlen_enc = vector_length_encoding(this); 7365 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 //Long variable shift 7371 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7372 predicate(n->as_ShiftV()->is_var_shift()); 7373 match(Set dst ( LShiftVL src shift)); 7374 match(Set dst (URShiftVL src shift)); 7375 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7376 ins_encode %{ 7377 assert(UseAVX >= 2, "required"); 7378 7379 int opcode = this->ideal_Opcode(); 7380 int vlen_enc = vector_length_encoding(this); 7381 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7382 %} 7383 ins_pipe( pipe_slow ); 7384 %} 7385 7386 //Long variable right shift arithmetic 7387 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7388 predicate(Matcher::vector_length(n) <= 4 && 7389 n->as_ShiftV()->is_var_shift() && 7390 UseAVX == 2); 7391 match(Set dst (RShiftVL src shift)); 7392 effect(TEMP dst, TEMP vtmp); 7393 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7394 ins_encode %{ 7395 int opcode = this->ideal_Opcode(); 7396 int vlen_enc = vector_length_encoding(this); 7397 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7398 $vtmp$$XMMRegister); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7404 predicate(n->as_ShiftV()->is_var_shift() && 7405 UseAVX > 2); 7406 match(Set dst (RShiftVL src shift)); 7407 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7408 ins_encode %{ 7409 int opcode = this->ideal_Opcode(); 7410 int vlen_enc = vector_length_encoding(this); 7411 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 // --------------------------------- AND -------------------------------------- 7417 7418 instruct vand(vec dst, vec src) %{ 7419 predicate(UseAVX == 0); 7420 match(Set dst (AndV dst src)); 7421 format %{ "pand $dst,$src\t! and vectors" %} 7422 ins_encode %{ 7423 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7429 predicate(UseAVX > 0); 7430 match(Set dst (AndV src1 src2)); 7431 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7432 ins_encode %{ 7433 int vlen_enc = vector_length_encoding(this); 7434 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 instruct vand_mem(vec dst, vec src, memory mem) %{ 7440 predicate((UseAVX > 0) && 7441 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7442 match(Set dst (AndV src (LoadVector mem))); 7443 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7444 ins_encode %{ 7445 int vlen_enc = vector_length_encoding(this); 7446 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7447 %} 7448 ins_pipe( pipe_slow ); 7449 %} 7450 7451 // --------------------------------- OR --------------------------------------- 7452 7453 instruct vor(vec dst, vec src) %{ 7454 predicate(UseAVX == 0); 7455 match(Set dst (OrV dst src)); 7456 format %{ "por $dst,$src\t! or vectors" %} 7457 ins_encode %{ 7458 __ por($dst$$XMMRegister, $src$$XMMRegister); 7459 %} 7460 ins_pipe( pipe_slow ); 7461 %} 7462 7463 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7464 predicate(UseAVX > 0); 7465 match(Set dst (OrV src1 src2)); 7466 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7467 ins_encode %{ 7468 int vlen_enc = vector_length_encoding(this); 7469 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7470 %} 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 instruct vor_mem(vec dst, vec src, memory mem) %{ 7475 predicate((UseAVX > 0) && 7476 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7477 match(Set dst (OrV src (LoadVector mem))); 7478 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7479 ins_encode %{ 7480 int vlen_enc = vector_length_encoding(this); 7481 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7482 %} 7483 ins_pipe( pipe_slow ); 7484 %} 7485 7486 // --------------------------------- XOR -------------------------------------- 7487 7488 instruct vxor(vec dst, vec src) %{ 7489 predicate(UseAVX == 0); 7490 match(Set dst (XorV dst src)); 7491 format %{ "pxor $dst,$src\t! xor vectors" %} 7492 ins_encode %{ 7493 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7494 %} 7495 ins_pipe( pipe_slow ); 7496 %} 7497 7498 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7499 predicate(UseAVX > 0); 7500 match(Set dst (XorV src1 src2)); 7501 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7502 ins_encode %{ 7503 int vlen_enc = vector_length_encoding(this); 7504 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7505 %} 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7510 predicate((UseAVX > 0) && 7511 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7512 match(Set dst (XorV src (LoadVector mem))); 7513 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7514 ins_encode %{ 7515 int vlen_enc = vector_length_encoding(this); 7516 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 // --------------------------------- VectorCast -------------------------------------- 7522 7523 instruct vcastBtoX(vec dst, vec src) %{ 7524 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7525 match(Set dst (VectorCastB2X src)); 7526 format %{ "vector_cast_b2x $dst,$src\t!" %} 7527 ins_encode %{ 7528 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7529 int vlen_enc = vector_length_encoding(this); 7530 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7531 %} 7532 ins_pipe( pipe_slow ); 7533 %} 7534 7535 instruct vcastBtoD(legVec dst, legVec src) %{ 7536 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7537 match(Set dst (VectorCastB2X src)); 7538 format %{ "vector_cast_b2x $dst,$src\t!" %} 7539 ins_encode %{ 7540 int vlen_enc = vector_length_encoding(this); 7541 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 instruct castStoX(vec dst, vec src) %{ 7547 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7548 Matcher::vector_length(n->in(1)) <= 8 && // src 7549 Matcher::vector_element_basic_type(n) == T_BYTE); 7550 match(Set dst (VectorCastS2X src)); 7551 format %{ "vector_cast_s2x $dst,$src" %} 7552 ins_encode %{ 7553 assert(UseAVX > 0, "required"); 7554 7555 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7556 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7557 %} 7558 ins_pipe( pipe_slow ); 7559 %} 7560 7561 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7562 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7563 Matcher::vector_length(n->in(1)) == 16 && // src 7564 Matcher::vector_element_basic_type(n) == T_BYTE); 7565 effect(TEMP dst, TEMP vtmp); 7566 match(Set dst (VectorCastS2X src)); 7567 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7568 ins_encode %{ 7569 assert(UseAVX > 0, "required"); 7570 7571 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7572 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7573 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7574 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 instruct vcastStoX_evex(vec dst, vec src) %{ 7580 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7581 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7582 match(Set dst (VectorCastS2X src)); 7583 format %{ "vector_cast_s2x $dst,$src\t!" %} 7584 ins_encode %{ 7585 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7586 int src_vlen_enc = vector_length_encoding(this, $src); 7587 int vlen_enc = vector_length_encoding(this); 7588 switch (to_elem_bt) { 7589 case T_BYTE: 7590 if (!VM_Version::supports_avx512vl()) { 7591 vlen_enc = Assembler::AVX_512bit; 7592 } 7593 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7594 break; 7595 case T_INT: 7596 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7597 break; 7598 case T_FLOAT: 7599 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7600 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7601 break; 7602 case T_LONG: 7603 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7604 break; 7605 case T_DOUBLE: { 7606 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7607 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7608 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7609 break; 7610 } 7611 default: 7612 ShouldNotReachHere(); 7613 } 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct castItoX(vec dst, vec src) %{ 7619 predicate(UseAVX <= 2 && 7620 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7621 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7622 match(Set dst (VectorCastI2X src)); 7623 format %{ "vector_cast_i2x $dst,$src" %} 7624 ins_encode %{ 7625 assert(UseAVX > 0, "required"); 7626 7627 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7628 int vlen_enc = vector_length_encoding(this, $src); 7629 7630 if (to_elem_bt == T_BYTE) { 7631 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7632 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7633 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7634 } else { 7635 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7636 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7637 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7638 } 7639 %} 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7644 predicate(UseAVX <= 2 && 7645 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7646 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7647 match(Set dst (VectorCastI2X src)); 7648 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7649 effect(TEMP dst, TEMP vtmp); 7650 ins_encode %{ 7651 assert(UseAVX > 0, "required"); 7652 7653 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7654 int vlen_enc = vector_length_encoding(this, $src); 7655 7656 if (to_elem_bt == T_BYTE) { 7657 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7658 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7659 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7660 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7661 } else { 7662 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7663 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7664 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7665 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7666 } 7667 %} 7668 ins_pipe( pipe_slow ); 7669 %} 7670 7671 instruct vcastItoX_evex(vec dst, vec src) %{ 7672 predicate(UseAVX > 2 || 7673 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7674 match(Set dst (VectorCastI2X src)); 7675 format %{ "vector_cast_i2x $dst,$src\t!" %} 7676 ins_encode %{ 7677 assert(UseAVX > 0, "required"); 7678 7679 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7680 int src_vlen_enc = vector_length_encoding(this, $src); 7681 int dst_vlen_enc = vector_length_encoding(this); 7682 switch (dst_elem_bt) { 7683 case T_BYTE: 7684 if (!VM_Version::supports_avx512vl()) { 7685 src_vlen_enc = Assembler::AVX_512bit; 7686 } 7687 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7688 break; 7689 case T_SHORT: 7690 if (!VM_Version::supports_avx512vl()) { 7691 src_vlen_enc = Assembler::AVX_512bit; 7692 } 7693 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7694 break; 7695 case T_FLOAT: 7696 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7697 break; 7698 case T_LONG: 7699 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7700 break; 7701 case T_DOUBLE: 7702 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7703 break; 7704 default: 7705 ShouldNotReachHere(); 7706 } 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vcastLtoBS(vec dst, vec src) %{ 7712 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7713 UseAVX <= 2); 7714 match(Set dst (VectorCastL2X src)); 7715 format %{ "vector_cast_l2x $dst,$src" %} 7716 ins_encode %{ 7717 assert(UseAVX > 0, "required"); 7718 7719 int vlen = Matcher::vector_length_in_bytes(this, $src); 7720 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7721 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7722 : ExternalAddress(vector_int_to_short_mask()); 7723 if (vlen <= 16) { 7724 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7725 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7726 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7727 } else { 7728 assert(vlen <= 32, "required"); 7729 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7730 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7731 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7732 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7733 } 7734 if (to_elem_bt == T_BYTE) { 7735 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7736 } 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vcastLtoX_evex(vec dst, vec src) %{ 7742 predicate(UseAVX > 2 || 7743 (Matcher::vector_element_basic_type(n) == T_INT || 7744 Matcher::vector_element_basic_type(n) == T_FLOAT || 7745 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7746 match(Set dst (VectorCastL2X src)); 7747 format %{ "vector_cast_l2x $dst,$src\t!" %} 7748 ins_encode %{ 7749 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7750 int vlen = Matcher::vector_length_in_bytes(this, $src); 7751 int vlen_enc = vector_length_encoding(this, $src); 7752 switch (to_elem_bt) { 7753 case T_BYTE: 7754 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7755 vlen_enc = Assembler::AVX_512bit; 7756 } 7757 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7758 break; 7759 case T_SHORT: 7760 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7761 vlen_enc = Assembler::AVX_512bit; 7762 } 7763 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7764 break; 7765 case T_INT: 7766 if (vlen == 8) { 7767 if ($dst$$XMMRegister != $src$$XMMRegister) { 7768 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7769 } 7770 } else if (vlen == 16) { 7771 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7772 } else if (vlen == 32) { 7773 if (UseAVX > 2) { 7774 if (!VM_Version::supports_avx512vl()) { 7775 vlen_enc = Assembler::AVX_512bit; 7776 } 7777 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7778 } else { 7779 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7780 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7781 } 7782 } else { // vlen == 64 7783 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7784 } 7785 break; 7786 case T_FLOAT: 7787 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7788 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7789 break; 7790 case T_DOUBLE: 7791 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7792 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7793 break; 7794 7795 default: assert(false, "%s", type2name(to_elem_bt)); 7796 } 7797 %} 7798 ins_pipe( pipe_slow ); 7799 %} 7800 7801 instruct vcastFtoD_reg(vec dst, vec src) %{ 7802 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7803 match(Set dst (VectorCastF2X src)); 7804 format %{ "vector_cast_f2d $dst,$src\t!" %} 7805 ins_encode %{ 7806 int vlen_enc = vector_length_encoding(this); 7807 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7808 %} 7809 ins_pipe( pipe_slow ); 7810 %} 7811 7812 7813 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7814 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7815 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7816 match(Set dst (VectorCastF2X src)); 7817 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7818 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7819 ins_encode %{ 7820 int vlen_enc = vector_length_encoding(this, $src); 7821 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7822 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7823 // 32 bit addresses for register indirect addressing mode since stub constants 7824 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7825 // However, targets are free to increase this limit, but having a large code cache size 7826 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7827 // cap we save a temporary register allocation which in limiting case can prevent 7828 // spilling in high register pressure blocks. 7829 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7830 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7831 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7832 %} 7833 ins_pipe( pipe_slow ); 7834 %} 7835 7836 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7837 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7838 is_integral_type(Matcher::vector_element_basic_type(n))); 7839 match(Set dst (VectorCastF2X src)); 7840 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7841 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7842 ins_encode %{ 7843 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7844 if (to_elem_bt == T_LONG) { 7845 int vlen_enc = vector_length_encoding(this); 7846 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7847 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7848 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7849 } else { 7850 int vlen_enc = vector_length_encoding(this, $src); 7851 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7852 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7853 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7854 } 7855 %} 7856 ins_pipe( pipe_slow ); 7857 %} 7858 7859 instruct vcastDtoF_reg(vec dst, vec src) %{ 7860 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7861 match(Set dst (VectorCastD2X src)); 7862 format %{ "vector_cast_d2x $dst,$src\t!" %} 7863 ins_encode %{ 7864 int vlen_enc = vector_length_encoding(this, $src); 7865 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7871 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7872 is_integral_type(Matcher::vector_element_basic_type(n))); 7873 match(Set dst (VectorCastD2X src)); 7874 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7875 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7876 ins_encode %{ 7877 int vlen_enc = vector_length_encoding(this, $src); 7878 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7879 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7880 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7881 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7887 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7888 is_integral_type(Matcher::vector_element_basic_type(n))); 7889 match(Set dst (VectorCastD2X src)); 7890 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7891 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7892 ins_encode %{ 7893 int vlen_enc = vector_length_encoding(this, $src); 7894 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7895 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7896 ExternalAddress(vector_float_signflip()); 7897 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7898 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vucast(vec dst, vec src) %{ 7904 match(Set dst (VectorUCastB2X src)); 7905 match(Set dst (VectorUCastS2X src)); 7906 match(Set dst (VectorUCastI2X src)); 7907 format %{ "vector_ucast $dst,$src\t!" %} 7908 ins_encode %{ 7909 assert(UseAVX > 0, "required"); 7910 7911 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7912 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7913 int vlen_enc = vector_length_encoding(this); 7914 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7920 predicate(!VM_Version::supports_avx512vl() && 7921 Matcher::vector_length_in_bytes(n) < 64 && 7922 Matcher::vector_element_basic_type(n) == T_INT); 7923 match(Set dst (RoundVF src)); 7924 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7925 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7926 ins_encode %{ 7927 int vlen_enc = vector_length_encoding(this); 7928 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7929 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7930 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7931 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7937 predicate((VM_Version::supports_avx512vl() || 7938 Matcher::vector_length_in_bytes(n) == 64) && 7939 Matcher::vector_element_basic_type(n) == T_INT); 7940 match(Set dst (RoundVF src)); 7941 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7942 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7943 ins_encode %{ 7944 int vlen_enc = vector_length_encoding(this); 7945 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7946 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7947 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7948 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7954 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7955 match(Set dst (RoundVD src)); 7956 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7957 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7958 ins_encode %{ 7959 int vlen_enc = vector_length_encoding(this); 7960 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7961 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7962 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7963 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 // --------------------------------- VectorMaskCmp -------------------------------------- 7969 7970 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7971 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7972 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7973 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7974 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7975 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7976 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7977 ins_encode %{ 7978 int vlen_enc = vector_length_encoding(this, $src1); 7979 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7980 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7981 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7982 } else { 7983 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7984 } 7985 %} 7986 ins_pipe( pipe_slow ); 7987 %} 7988 7989 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7990 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7991 n->bottom_type()->isa_vectmask() == nullptr && 7992 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7993 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7994 effect(TEMP ktmp); 7995 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7996 ins_encode %{ 7997 int vlen_enc = Assembler::AVX_512bit; 7998 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7999 KRegister mask = k0; // The comparison itself is not being masked. 8000 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8001 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8002 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8003 } else { 8004 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8005 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8006 } 8007 %} 8008 ins_pipe( pipe_slow ); 8009 %} 8010 8011 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 8012 predicate(n->bottom_type()->isa_vectmask() && 8013 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8014 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8015 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 8016 ins_encode %{ 8017 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8018 int vlen_enc = vector_length_encoding(this, $src1); 8019 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8020 KRegister mask = k0; // The comparison itself is not being masked. 8021 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8022 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8023 } else { 8024 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8025 } 8026 %} 8027 ins_pipe( pipe_slow ); 8028 %} 8029 8030 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8031 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8032 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8033 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8034 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8035 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8036 (n->in(2)->get_int() == BoolTest::eq || 8037 n->in(2)->get_int() == BoolTest::lt || 8038 n->in(2)->get_int() == BoolTest::gt)); // cond 8039 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8040 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8041 ins_encode %{ 8042 int vlen_enc = vector_length_encoding(this, $src1); 8043 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8044 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8045 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8051 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8052 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8053 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8054 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8055 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8056 (n->in(2)->get_int() == BoolTest::ne || 8057 n->in(2)->get_int() == BoolTest::le || 8058 n->in(2)->get_int() == BoolTest::ge)); // cond 8059 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8060 effect(TEMP dst, TEMP xtmp); 8061 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8062 ins_encode %{ 8063 int vlen_enc = vector_length_encoding(this, $src1); 8064 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8065 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8066 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8072 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8073 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8074 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8075 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8076 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8077 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8078 effect(TEMP dst, TEMP xtmp); 8079 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8080 ins_encode %{ 8081 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8082 int vlen_enc = vector_length_encoding(this, $src1); 8083 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8084 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8085 8086 if (vlen_enc == Assembler::AVX_128bit) { 8087 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8088 } else { 8089 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8090 } 8091 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8092 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8093 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8094 %} 8095 ins_pipe( pipe_slow ); 8096 %} 8097 8098 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8099 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8100 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8101 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8102 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8103 effect(TEMP ktmp); 8104 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8105 ins_encode %{ 8106 assert(UseAVX > 2, "required"); 8107 8108 int vlen_enc = vector_length_encoding(this, $src1); 8109 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8110 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8111 KRegister mask = k0; // The comparison itself is not being masked. 8112 bool merge = false; 8113 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8114 8115 switch (src1_elem_bt) { 8116 case T_INT: { 8117 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8118 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8119 break; 8120 } 8121 case T_LONG: { 8122 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8123 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8124 break; 8125 } 8126 default: assert(false, "%s", type2name(src1_elem_bt)); 8127 } 8128 %} 8129 ins_pipe( pipe_slow ); 8130 %} 8131 8132 8133 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8134 predicate(n->bottom_type()->isa_vectmask() && 8135 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8136 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8137 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8138 ins_encode %{ 8139 assert(UseAVX > 2, "required"); 8140 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8141 8142 int vlen_enc = vector_length_encoding(this, $src1); 8143 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8144 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8145 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8146 8147 // Comparison i 8148 switch (src1_elem_bt) { 8149 case T_BYTE: { 8150 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8151 break; 8152 } 8153 case T_SHORT: { 8154 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8155 break; 8156 } 8157 case T_INT: { 8158 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8159 break; 8160 } 8161 case T_LONG: { 8162 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8163 break; 8164 } 8165 default: assert(false, "%s", type2name(src1_elem_bt)); 8166 } 8167 %} 8168 ins_pipe( pipe_slow ); 8169 %} 8170 8171 // Extract 8172 8173 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8174 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8175 match(Set dst (ExtractI src idx)); 8176 match(Set dst (ExtractS src idx)); 8177 match(Set dst (ExtractB src idx)); 8178 format %{ "extractI $dst,$src,$idx\t!" %} 8179 ins_encode %{ 8180 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8181 8182 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8183 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8189 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8190 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8191 match(Set dst (ExtractI src idx)); 8192 match(Set dst (ExtractS src idx)); 8193 match(Set dst (ExtractB src idx)); 8194 effect(TEMP vtmp); 8195 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8196 ins_encode %{ 8197 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8198 8199 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8200 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8201 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8202 %} 8203 ins_pipe( pipe_slow ); 8204 %} 8205 8206 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8207 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8208 match(Set dst (ExtractL src idx)); 8209 format %{ "extractL $dst,$src,$idx\t!" %} 8210 ins_encode %{ 8211 assert(UseSSE >= 4, "required"); 8212 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8213 8214 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8215 %} 8216 ins_pipe( pipe_slow ); 8217 %} 8218 8219 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8220 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8221 Matcher::vector_length(n->in(1)) == 8); // src 8222 match(Set dst (ExtractL src idx)); 8223 effect(TEMP vtmp); 8224 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8225 ins_encode %{ 8226 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8227 8228 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8229 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8230 %} 8231 ins_pipe( pipe_slow ); 8232 %} 8233 8234 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8235 predicate(Matcher::vector_length(n->in(1)) <= 4); 8236 match(Set dst (ExtractF src idx)); 8237 effect(TEMP dst, TEMP vtmp); 8238 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8239 ins_encode %{ 8240 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8241 8242 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8243 %} 8244 ins_pipe( pipe_slow ); 8245 %} 8246 8247 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8248 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8249 Matcher::vector_length(n->in(1)/*src*/) == 16); 8250 match(Set dst (ExtractF src idx)); 8251 effect(TEMP vtmp); 8252 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8253 ins_encode %{ 8254 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8255 8256 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8257 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8258 %} 8259 ins_pipe( pipe_slow ); 8260 %} 8261 8262 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8263 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8264 match(Set dst (ExtractD src idx)); 8265 format %{ "extractD $dst,$src,$idx\t!" %} 8266 ins_encode %{ 8267 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8268 8269 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8275 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8276 Matcher::vector_length(n->in(1)) == 8); // src 8277 match(Set dst (ExtractD src idx)); 8278 effect(TEMP vtmp); 8279 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8280 ins_encode %{ 8281 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8282 8283 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8284 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8285 %} 8286 ins_pipe( pipe_slow ); 8287 %} 8288 8289 // --------------------------------- Vector Blend -------------------------------------- 8290 8291 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8292 predicate(UseAVX == 0); 8293 match(Set dst (VectorBlend (Binary dst src) mask)); 8294 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8295 effect(TEMP tmp); 8296 ins_encode %{ 8297 assert(UseSSE >= 4, "required"); 8298 8299 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8300 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8301 } 8302 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8303 %} 8304 ins_pipe( pipe_slow ); 8305 %} 8306 8307 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8308 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8309 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8310 Matcher::vector_length_in_bytes(n) <= 32 && 8311 is_integral_type(Matcher::vector_element_basic_type(n))); 8312 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8313 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8314 ins_encode %{ 8315 int vlen_enc = vector_length_encoding(this); 8316 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8317 %} 8318 ins_pipe( pipe_slow ); 8319 %} 8320 8321 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8322 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8323 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8324 Matcher::vector_length_in_bytes(n) <= 32 && 8325 !is_integral_type(Matcher::vector_element_basic_type(n))); 8326 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8327 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8328 ins_encode %{ 8329 int vlen_enc = vector_length_encoding(this); 8330 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8331 %} 8332 ins_pipe( pipe_slow ); 8333 %} 8334 8335 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8336 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8337 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8338 Matcher::vector_length_in_bytes(n) <= 32); 8339 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8340 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8341 effect(TEMP vtmp, TEMP dst); 8342 ins_encode %{ 8343 int vlen_enc = vector_length_encoding(this); 8344 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8345 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8346 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8347 %} 8348 ins_pipe( pipe_slow ); 8349 %} 8350 8351 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8352 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8353 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8354 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8355 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8356 effect(TEMP ktmp); 8357 ins_encode %{ 8358 int vlen_enc = Assembler::AVX_512bit; 8359 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8360 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8361 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8362 %} 8363 ins_pipe( pipe_slow ); 8364 %} 8365 8366 8367 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8368 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8369 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8370 VM_Version::supports_avx512bw())); 8371 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8372 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8373 ins_encode %{ 8374 int vlen_enc = vector_length_encoding(this); 8375 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8376 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8377 %} 8378 ins_pipe( pipe_slow ); 8379 %} 8380 8381 // --------------------------------- ABS -------------------------------------- 8382 // a = |a| 8383 instruct vabsB_reg(vec dst, vec src) %{ 8384 match(Set dst (AbsVB src)); 8385 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8386 ins_encode %{ 8387 uint vlen = Matcher::vector_length(this); 8388 if (vlen <= 16) { 8389 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8390 } else { 8391 int vlen_enc = vector_length_encoding(this); 8392 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8393 } 8394 %} 8395 ins_pipe( pipe_slow ); 8396 %} 8397 8398 instruct vabsS_reg(vec dst, vec src) %{ 8399 match(Set dst (AbsVS src)); 8400 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8401 ins_encode %{ 8402 uint vlen = Matcher::vector_length(this); 8403 if (vlen <= 8) { 8404 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8405 } else { 8406 int vlen_enc = vector_length_encoding(this); 8407 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8408 } 8409 %} 8410 ins_pipe( pipe_slow ); 8411 %} 8412 8413 instruct vabsI_reg(vec dst, vec src) %{ 8414 match(Set dst (AbsVI src)); 8415 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8416 ins_encode %{ 8417 uint vlen = Matcher::vector_length(this); 8418 if (vlen <= 4) { 8419 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8420 } else { 8421 int vlen_enc = vector_length_encoding(this); 8422 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8423 } 8424 %} 8425 ins_pipe( pipe_slow ); 8426 %} 8427 8428 instruct vabsL_reg(vec dst, vec src) %{ 8429 match(Set dst (AbsVL src)); 8430 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8431 ins_encode %{ 8432 assert(UseAVX > 2, "required"); 8433 int vlen_enc = vector_length_encoding(this); 8434 if (!VM_Version::supports_avx512vl()) { 8435 vlen_enc = Assembler::AVX_512bit; 8436 } 8437 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8438 %} 8439 ins_pipe( pipe_slow ); 8440 %} 8441 8442 // --------------------------------- ABSNEG -------------------------------------- 8443 8444 instruct vabsnegF(vec dst, vec src) %{ 8445 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8446 match(Set dst (AbsVF src)); 8447 match(Set dst (NegVF src)); 8448 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8449 ins_cost(150); 8450 ins_encode %{ 8451 int opcode = this->ideal_Opcode(); 8452 int vlen = Matcher::vector_length(this); 8453 if (vlen == 2) { 8454 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8455 } else { 8456 assert(vlen == 8 || vlen == 16, "required"); 8457 int vlen_enc = vector_length_encoding(this); 8458 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8459 } 8460 %} 8461 ins_pipe( pipe_slow ); 8462 %} 8463 8464 instruct vabsneg4F(vec dst) %{ 8465 predicate(Matcher::vector_length(n) == 4); 8466 match(Set dst (AbsVF dst)); 8467 match(Set dst (NegVF dst)); 8468 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8469 ins_cost(150); 8470 ins_encode %{ 8471 int opcode = this->ideal_Opcode(); 8472 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct vabsnegD(vec dst, vec src) %{ 8478 match(Set dst (AbsVD src)); 8479 match(Set dst (NegVD src)); 8480 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8481 ins_encode %{ 8482 int opcode = this->ideal_Opcode(); 8483 uint vlen = Matcher::vector_length(this); 8484 if (vlen == 2) { 8485 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8486 } else { 8487 int vlen_enc = vector_length_encoding(this); 8488 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8489 } 8490 %} 8491 ins_pipe( pipe_slow ); 8492 %} 8493 8494 //------------------------------------- VectorTest -------------------------------------------- 8495 8496 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8497 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8498 match(Set cr (VectorTest src1 src2)); 8499 effect(TEMP vtmp); 8500 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8501 ins_encode %{ 8502 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8503 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8504 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8505 %} 8506 ins_pipe( pipe_slow ); 8507 %} 8508 8509 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8510 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8511 match(Set cr (VectorTest src1 src2)); 8512 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8513 ins_encode %{ 8514 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8515 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8516 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8522 predicate((Matcher::vector_length(n->in(1)) < 8 || 8523 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8524 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8525 match(Set cr (VectorTest src1 src2)); 8526 effect(TEMP tmp); 8527 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8528 ins_encode %{ 8529 uint masklen = Matcher::vector_length(this, $src1); 8530 __ kmovwl($tmp$$Register, $src1$$KRegister); 8531 __ andl($tmp$$Register, (1 << masklen) - 1); 8532 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8538 predicate((Matcher::vector_length(n->in(1)) < 8 || 8539 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8540 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8541 match(Set cr (VectorTest src1 src2)); 8542 effect(TEMP tmp); 8543 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8544 ins_encode %{ 8545 uint masklen = Matcher::vector_length(this, $src1); 8546 __ kmovwl($tmp$$Register, $src1$$KRegister); 8547 __ andl($tmp$$Register, (1 << masklen) - 1); 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8553 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8554 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8555 match(Set cr (VectorTest src1 src2)); 8556 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8557 ins_encode %{ 8558 uint masklen = Matcher::vector_length(this, $src1); 8559 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8560 %} 8561 ins_pipe( pipe_slow ); 8562 %} 8563 8564 //------------------------------------- LoadMask -------------------------------------------- 8565 8566 instruct loadMask(legVec dst, legVec src) %{ 8567 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8568 match(Set dst (VectorLoadMask src)); 8569 effect(TEMP dst); 8570 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8571 ins_encode %{ 8572 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8573 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8574 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8580 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8581 match(Set dst (VectorLoadMask src)); 8582 effect(TEMP xtmp); 8583 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8584 ins_encode %{ 8585 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8586 true, Assembler::AVX_512bit); 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8592 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8593 match(Set dst (VectorLoadMask src)); 8594 effect(TEMP xtmp); 8595 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8596 ins_encode %{ 8597 int vlen_enc = vector_length_encoding(in(1)); 8598 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8599 false, vlen_enc); 8600 %} 8601 ins_pipe( pipe_slow ); 8602 %} 8603 8604 //------------------------------------- StoreMask -------------------------------------------- 8605 8606 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8607 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8608 match(Set dst (VectorStoreMask src size)); 8609 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8610 ins_encode %{ 8611 int vlen = Matcher::vector_length(this); 8612 if (vlen <= 16 && UseAVX <= 2) { 8613 assert(UseSSE >= 3, "required"); 8614 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8615 } else { 8616 assert(UseAVX > 0, "required"); 8617 int src_vlen_enc = vector_length_encoding(this, $src); 8618 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8619 } 8620 %} 8621 ins_pipe( pipe_slow ); 8622 %} 8623 8624 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8625 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8626 match(Set dst (VectorStoreMask src size)); 8627 effect(TEMP_DEF dst, TEMP xtmp); 8628 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8629 ins_encode %{ 8630 int vlen_enc = Assembler::AVX_128bit; 8631 int vlen = Matcher::vector_length(this); 8632 if (vlen <= 8) { 8633 assert(UseSSE >= 3, "required"); 8634 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8635 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8636 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8637 } else { 8638 assert(UseAVX > 0, "required"); 8639 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8640 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8641 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8642 } 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8648 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8649 match(Set dst (VectorStoreMask src size)); 8650 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8651 effect(TEMP_DEF dst, TEMP xtmp); 8652 ins_encode %{ 8653 int vlen_enc = Assembler::AVX_128bit; 8654 int vlen = Matcher::vector_length(this); 8655 if (vlen <= 4) { 8656 assert(UseSSE >= 3, "required"); 8657 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8658 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8659 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8660 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8661 } else { 8662 assert(UseAVX > 0, "required"); 8663 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8664 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8665 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8666 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8667 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8668 } 8669 %} 8670 ins_pipe( pipe_slow ); 8671 %} 8672 8673 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8674 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8675 match(Set dst (VectorStoreMask src size)); 8676 effect(TEMP_DEF dst, TEMP xtmp); 8677 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8678 ins_encode %{ 8679 assert(UseSSE >= 3, "required"); 8680 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8681 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8682 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8683 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8684 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8685 %} 8686 ins_pipe( pipe_slow ); 8687 %} 8688 8689 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8690 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8691 match(Set dst (VectorStoreMask src size)); 8692 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8693 effect(TEMP_DEF dst, TEMP vtmp); 8694 ins_encode %{ 8695 int vlen_enc = Assembler::AVX_128bit; 8696 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8697 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8698 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8699 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8700 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8701 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8702 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8703 %} 8704 ins_pipe( pipe_slow ); 8705 %} 8706 8707 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8708 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8709 match(Set dst (VectorStoreMask src size)); 8710 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8711 ins_encode %{ 8712 int src_vlen_enc = vector_length_encoding(this, $src); 8713 int dst_vlen_enc = vector_length_encoding(this); 8714 if (!VM_Version::supports_avx512vl()) { 8715 src_vlen_enc = Assembler::AVX_512bit; 8716 } 8717 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8718 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8724 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8725 match(Set dst (VectorStoreMask src size)); 8726 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8727 ins_encode %{ 8728 int src_vlen_enc = vector_length_encoding(this, $src); 8729 int dst_vlen_enc = vector_length_encoding(this); 8730 if (!VM_Version::supports_avx512vl()) { 8731 src_vlen_enc = Assembler::AVX_512bit; 8732 } 8733 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8734 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8740 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8741 match(Set dst (VectorStoreMask mask size)); 8742 effect(TEMP_DEF dst); 8743 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8744 ins_encode %{ 8745 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8746 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8747 false, Assembler::AVX_512bit, noreg); 8748 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8754 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8755 match(Set dst (VectorStoreMask mask size)); 8756 effect(TEMP_DEF dst); 8757 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8758 ins_encode %{ 8759 int dst_vlen_enc = vector_length_encoding(this); 8760 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8761 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8762 %} 8763 ins_pipe( pipe_slow ); 8764 %} 8765 8766 instruct vmaskcast_evex(kReg dst) %{ 8767 match(Set dst (VectorMaskCast dst)); 8768 ins_cost(0); 8769 format %{ "vector_mask_cast $dst" %} 8770 ins_encode %{ 8771 // empty 8772 %} 8773 ins_pipe(empty); 8774 %} 8775 8776 instruct vmaskcast(vec dst) %{ 8777 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8778 match(Set dst (VectorMaskCast dst)); 8779 ins_cost(0); 8780 format %{ "vector_mask_cast $dst" %} 8781 ins_encode %{ 8782 // empty 8783 %} 8784 ins_pipe(empty); 8785 %} 8786 8787 instruct vmaskcast_avx(vec dst, vec src) %{ 8788 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8789 match(Set dst (VectorMaskCast src)); 8790 format %{ "vector_mask_cast $dst, $src" %} 8791 ins_encode %{ 8792 int vlen = Matcher::vector_length(this); 8793 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8794 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8795 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8796 %} 8797 ins_pipe(pipe_slow); 8798 %} 8799 8800 //-------------------------------- Load Iota Indices ---------------------------------- 8801 8802 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8803 match(Set dst (VectorLoadConst src)); 8804 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8805 ins_encode %{ 8806 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8807 BasicType bt = Matcher::vector_element_basic_type(this); 8808 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8809 %} 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8814 match(Set dst (PopulateIndex src1 src2)); 8815 effect(TEMP dst, TEMP vtmp); 8816 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8817 ins_encode %{ 8818 assert($src2$$constant == 1, "required"); 8819 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8820 int vlen_enc = vector_length_encoding(this); 8821 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8822 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8823 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8824 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8825 %} 8826 ins_pipe( pipe_slow ); 8827 %} 8828 8829 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8830 match(Set dst (PopulateIndex src1 src2)); 8831 effect(TEMP dst, TEMP vtmp); 8832 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8833 ins_encode %{ 8834 assert($src2$$constant == 1, "required"); 8835 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8836 int vlen_enc = vector_length_encoding(this); 8837 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8838 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8839 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8840 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8841 %} 8842 ins_pipe( pipe_slow ); 8843 %} 8844 8845 //-------------------------------- Rearrange ---------------------------------- 8846 8847 // LoadShuffle/Rearrange for Byte 8848 instruct rearrangeB(vec dst, vec shuffle) %{ 8849 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8850 Matcher::vector_length(n) < 32); 8851 match(Set dst (VectorRearrange dst shuffle)); 8852 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8853 ins_encode %{ 8854 assert(UseSSE >= 4, "required"); 8855 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8861 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8862 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8863 match(Set dst (VectorRearrange src shuffle)); 8864 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8865 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8866 ins_encode %{ 8867 assert(UseAVX >= 2, "required"); 8868 // Swap src into vtmp1 8869 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8870 // Shuffle swapped src to get entries from other 128 bit lane 8871 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8872 // Shuffle original src to get entries from self 128 bit lane 8873 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8874 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8875 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8876 // Perform the blend 8877 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8878 %} 8879 ins_pipe( pipe_slow ); 8880 %} 8881 8882 8883 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8884 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8885 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8886 match(Set dst (VectorRearrange src shuffle)); 8887 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8888 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8889 ins_encode %{ 8890 int vlen_enc = vector_length_encoding(this); 8891 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8892 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8893 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8899 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8900 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8901 match(Set dst (VectorRearrange src shuffle)); 8902 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8903 ins_encode %{ 8904 int vlen_enc = vector_length_encoding(this); 8905 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 // LoadShuffle/Rearrange for Short 8911 8912 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8913 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8914 !VM_Version::supports_avx512bw()); 8915 match(Set dst (VectorLoadShuffle src)); 8916 effect(TEMP dst, TEMP vtmp); 8917 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8918 ins_encode %{ 8919 // Create a byte shuffle mask from short shuffle mask 8920 // only byte shuffle instruction available on these platforms 8921 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8922 if (UseAVX == 0) { 8923 assert(vlen_in_bytes <= 16, "required"); 8924 // Multiply each shuffle by two to get byte index 8925 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8926 __ psllw($vtmp$$XMMRegister, 1); 8927 8928 // Duplicate to create 2 copies of byte index 8929 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8930 __ psllw($dst$$XMMRegister, 8); 8931 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8932 8933 // Add one to get alternate byte index 8934 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8935 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8936 } else { 8937 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8938 int vlen_enc = vector_length_encoding(this); 8939 // Multiply each shuffle by two to get byte index 8940 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8941 8942 // Duplicate to create 2 copies of byte index 8943 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8944 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8945 8946 // Add one to get alternate byte index 8947 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8948 } 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 instruct rearrangeS(vec dst, vec shuffle) %{ 8954 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8955 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8956 match(Set dst (VectorRearrange dst shuffle)); 8957 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8958 ins_encode %{ 8959 assert(UseSSE >= 4, "required"); 8960 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8966 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8967 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8968 match(Set dst (VectorRearrange src shuffle)); 8969 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8970 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8971 ins_encode %{ 8972 assert(UseAVX >= 2, "required"); 8973 // Swap src into vtmp1 8974 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8975 // Shuffle swapped src to get entries from other 128 bit lane 8976 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8977 // Shuffle original src to get entries from self 128 bit lane 8978 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8979 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8980 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8981 // Perform the blend 8982 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8988 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8989 VM_Version::supports_avx512bw()); 8990 match(Set dst (VectorRearrange src shuffle)); 8991 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8992 ins_encode %{ 8993 int vlen_enc = vector_length_encoding(this); 8994 if (!VM_Version::supports_avx512vl()) { 8995 vlen_enc = Assembler::AVX_512bit; 8996 } 8997 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8998 %} 8999 ins_pipe( pipe_slow ); 9000 %} 9001 9002 // LoadShuffle/Rearrange for Integer and Float 9003 9004 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 9005 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9006 Matcher::vector_length(n) == 4 && UseAVX == 0); 9007 match(Set dst (VectorLoadShuffle src)); 9008 effect(TEMP dst, TEMP vtmp); 9009 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9010 ins_encode %{ 9011 assert(UseSSE >= 4, "required"); 9012 9013 // Create a byte shuffle mask from int shuffle mask 9014 // only byte shuffle instruction available on these platforms 9015 9016 // Duplicate and multiply each shuffle by 4 9017 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 9018 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9019 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9020 __ psllw($vtmp$$XMMRegister, 2); 9021 9022 // Duplicate again to create 4 copies of byte index 9023 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9024 __ psllw($dst$$XMMRegister, 8); 9025 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9026 9027 // Add 3,2,1,0 to get alternate byte index 9028 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9029 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9030 %} 9031 ins_pipe( pipe_slow ); 9032 %} 9033 9034 instruct rearrangeI(vec dst, vec shuffle) %{ 9035 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9036 UseAVX == 0); 9037 match(Set dst (VectorRearrange dst shuffle)); 9038 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9039 ins_encode %{ 9040 assert(UseSSE >= 4, "required"); 9041 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9047 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9048 UseAVX > 0); 9049 match(Set dst (VectorRearrange src shuffle)); 9050 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9051 ins_encode %{ 9052 int vlen_enc = vector_length_encoding(this); 9053 BasicType bt = Matcher::vector_element_basic_type(this); 9054 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9055 %} 9056 ins_pipe( pipe_slow ); 9057 %} 9058 9059 // LoadShuffle/Rearrange for Long and Double 9060 9061 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9062 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9063 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9064 match(Set dst (VectorLoadShuffle src)); 9065 effect(TEMP dst, TEMP vtmp); 9066 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9067 ins_encode %{ 9068 assert(UseAVX >= 2, "required"); 9069 9070 int vlen_enc = vector_length_encoding(this); 9071 // Create a double word shuffle mask from long shuffle mask 9072 // only double word shuffle instruction available on these platforms 9073 9074 // Multiply each shuffle by two to get double word index 9075 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9076 9077 // Duplicate each double word shuffle 9078 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9079 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9080 9081 // Add one to get alternate double word index 9082 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9083 %} 9084 ins_pipe( pipe_slow ); 9085 %} 9086 9087 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9088 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9089 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9090 match(Set dst (VectorRearrange src shuffle)); 9091 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9092 ins_encode %{ 9093 assert(UseAVX >= 2, "required"); 9094 9095 int vlen_enc = vector_length_encoding(this); 9096 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9097 %} 9098 ins_pipe( pipe_slow ); 9099 %} 9100 9101 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9102 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9103 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9104 match(Set dst (VectorRearrange src shuffle)); 9105 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9106 ins_encode %{ 9107 assert(UseAVX > 2, "required"); 9108 9109 int vlen_enc = vector_length_encoding(this); 9110 if (vlen_enc == Assembler::AVX_128bit) { 9111 vlen_enc = Assembler::AVX_256bit; 9112 } 9113 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9114 %} 9115 ins_pipe( pipe_slow ); 9116 %} 9117 9118 // --------------------------------- FMA -------------------------------------- 9119 // a * b + c 9120 9121 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9122 match(Set c (FmaVF c (Binary a b))); 9123 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9124 ins_cost(150); 9125 ins_encode %{ 9126 assert(UseFMA, "not enabled"); 9127 int vlen_enc = vector_length_encoding(this); 9128 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9129 %} 9130 ins_pipe( pipe_slow ); 9131 %} 9132 9133 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9134 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9135 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9136 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9137 ins_cost(150); 9138 ins_encode %{ 9139 assert(UseFMA, "not enabled"); 9140 int vlen_enc = vector_length_encoding(this); 9141 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9147 match(Set c (FmaVD c (Binary a b))); 9148 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9149 ins_cost(150); 9150 ins_encode %{ 9151 assert(UseFMA, "not enabled"); 9152 int vlen_enc = vector_length_encoding(this); 9153 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9159 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9160 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9161 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9162 ins_cost(150); 9163 ins_encode %{ 9164 assert(UseFMA, "not enabled"); 9165 int vlen_enc = vector_length_encoding(this); 9166 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9167 %} 9168 ins_pipe( pipe_slow ); 9169 %} 9170 9171 // --------------------------------- Vector Multiply Add -------------------------------------- 9172 9173 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9174 predicate(UseAVX == 0); 9175 match(Set dst (MulAddVS2VI dst src1)); 9176 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9177 ins_encode %{ 9178 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9179 %} 9180 ins_pipe( pipe_slow ); 9181 %} 9182 9183 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9184 predicate(UseAVX > 0); 9185 match(Set dst (MulAddVS2VI src1 src2)); 9186 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9187 ins_encode %{ 9188 int vlen_enc = vector_length_encoding(this); 9189 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9190 %} 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9195 9196 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9197 predicate(VM_Version::supports_avx512_vnni()); 9198 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9199 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9200 ins_encode %{ 9201 assert(UseAVX > 2, "required"); 9202 int vlen_enc = vector_length_encoding(this); 9203 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9204 %} 9205 ins_pipe( pipe_slow ); 9206 ins_cost(10); 9207 %} 9208 9209 // --------------------------------- PopCount -------------------------------------- 9210 9211 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9212 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9213 match(Set dst (PopCountVI src)); 9214 match(Set dst (PopCountVL src)); 9215 format %{ "vector_popcount_integral $dst, $src" %} 9216 ins_encode %{ 9217 int opcode = this->ideal_Opcode(); 9218 int vlen_enc = vector_length_encoding(this, $src); 9219 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9220 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9226 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9227 match(Set dst (PopCountVI src mask)); 9228 match(Set dst (PopCountVL src mask)); 9229 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9230 ins_encode %{ 9231 int vlen_enc = vector_length_encoding(this, $src); 9232 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9233 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9234 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9240 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9241 match(Set dst (PopCountVI src)); 9242 match(Set dst (PopCountVL src)); 9243 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9244 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9245 ins_encode %{ 9246 int opcode = this->ideal_Opcode(); 9247 int vlen_enc = vector_length_encoding(this, $src); 9248 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9249 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9250 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9256 9257 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9258 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9259 Matcher::vector_length_in_bytes(n->in(1)))); 9260 match(Set dst (CountTrailingZerosV src)); 9261 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9262 ins_cost(400); 9263 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9264 ins_encode %{ 9265 int vlen_enc = vector_length_encoding(this, $src); 9266 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9267 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9268 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9269 %} 9270 ins_pipe( pipe_slow ); 9271 %} 9272 9273 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9274 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9275 VM_Version::supports_avx512cd() && 9276 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9277 match(Set dst (CountTrailingZerosV src)); 9278 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9279 ins_cost(400); 9280 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9281 ins_encode %{ 9282 int vlen_enc = vector_length_encoding(this, $src); 9283 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9284 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9285 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9286 %} 9287 ins_pipe( pipe_slow ); 9288 %} 9289 9290 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9291 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9292 match(Set dst (CountTrailingZerosV src)); 9293 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9294 ins_cost(400); 9295 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9296 ins_encode %{ 9297 int vlen_enc = vector_length_encoding(this, $src); 9298 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9299 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9300 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9301 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9302 %} 9303 ins_pipe( pipe_slow ); 9304 %} 9305 9306 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9307 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9308 match(Set dst (CountTrailingZerosV src)); 9309 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9310 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9311 ins_encode %{ 9312 int vlen_enc = vector_length_encoding(this, $src); 9313 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9314 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9315 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9316 %} 9317 ins_pipe( pipe_slow ); 9318 %} 9319 9320 9321 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9322 9323 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9324 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9325 effect(TEMP dst); 9326 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9327 ins_encode %{ 9328 int vector_len = vector_length_encoding(this); 9329 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9335 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9336 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9337 effect(TEMP dst); 9338 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9339 ins_encode %{ 9340 int vector_len = vector_length_encoding(this); 9341 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 // --------------------------------- Rotation Operations ---------------------------------- 9347 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9348 match(Set dst (RotateLeftV src shift)); 9349 match(Set dst (RotateRightV src shift)); 9350 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9351 ins_encode %{ 9352 int opcode = this->ideal_Opcode(); 9353 int vector_len = vector_length_encoding(this); 9354 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9355 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct vprorate(vec dst, vec src, vec shift) %{ 9361 match(Set dst (RotateLeftV src shift)); 9362 match(Set dst (RotateRightV src shift)); 9363 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9364 ins_encode %{ 9365 int opcode = this->ideal_Opcode(); 9366 int vector_len = vector_length_encoding(this); 9367 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9368 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 // ---------------------------------- Masked Operations ------------------------------------ 9374 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9375 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9376 match(Set dst (LoadVectorMasked mem mask)); 9377 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9378 ins_encode %{ 9379 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9380 int vlen_enc = vector_length_encoding(this); 9381 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9382 %} 9383 ins_pipe( pipe_slow ); 9384 %} 9385 9386 9387 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9388 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9389 match(Set dst (LoadVectorMasked mem mask)); 9390 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9391 ins_encode %{ 9392 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9393 int vector_len = vector_length_encoding(this); 9394 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9400 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9401 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9402 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9403 ins_encode %{ 9404 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9405 int vlen_enc = vector_length_encoding(src_node); 9406 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9407 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9413 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9414 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9415 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9416 ins_encode %{ 9417 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9418 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9419 int vlen_enc = vector_length_encoding(src_node); 9420 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9421 %} 9422 ins_pipe( pipe_slow ); 9423 %} 9424 9425 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9426 match(Set addr (VerifyVectorAlignment addr mask)); 9427 effect(KILL cr); 9428 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9429 ins_encode %{ 9430 Label Lskip; 9431 // check if masked bits of addr are zero 9432 __ testq($addr$$Register, $mask$$constant); 9433 __ jccb(Assembler::equal, Lskip); 9434 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9435 __ bind(Lskip); 9436 %} 9437 ins_pipe(pipe_slow); 9438 %} 9439 9440 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9441 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9442 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9443 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9444 ins_encode %{ 9445 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9446 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9447 9448 Label DONE; 9449 int vlen_enc = vector_length_encoding(this, $src1); 9450 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9451 9452 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9453 __ mov64($dst$$Register, -1L); 9454 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9455 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9456 __ jccb(Assembler::carrySet, DONE); 9457 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9458 __ notq($dst$$Register); 9459 __ tzcntq($dst$$Register, $dst$$Register); 9460 __ bind(DONE); 9461 %} 9462 ins_pipe( pipe_slow ); 9463 %} 9464 9465 9466 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9467 match(Set dst (VectorMaskGen len)); 9468 effect(TEMP temp, KILL cr); 9469 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9470 ins_encode %{ 9471 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9477 match(Set dst (VectorMaskGen len)); 9478 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9479 effect(TEMP temp); 9480 ins_encode %{ 9481 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9482 __ kmovql($dst$$KRegister, $temp$$Register); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9488 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9489 match(Set dst (VectorMaskToLong mask)); 9490 effect(TEMP dst, KILL cr); 9491 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9492 ins_encode %{ 9493 int opcode = this->ideal_Opcode(); 9494 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9495 int mask_len = Matcher::vector_length(this, $mask); 9496 int mask_size = mask_len * type2aelembytes(mbt); 9497 int vlen_enc = vector_length_encoding(this, $mask); 9498 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9499 $dst$$Register, mask_len, mask_size, vlen_enc); 9500 %} 9501 ins_pipe( pipe_slow ); 9502 %} 9503 9504 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9505 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9506 match(Set dst (VectorMaskToLong mask)); 9507 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9508 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9509 ins_encode %{ 9510 int opcode = this->ideal_Opcode(); 9511 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9512 int mask_len = Matcher::vector_length(this, $mask); 9513 int vlen_enc = vector_length_encoding(this, $mask); 9514 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9515 $dst$$Register, mask_len, mbt, vlen_enc); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9521 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9522 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9523 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9524 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9525 ins_encode %{ 9526 int opcode = this->ideal_Opcode(); 9527 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9528 int mask_len = Matcher::vector_length(this, $mask); 9529 int vlen_enc = vector_length_encoding(this, $mask); 9530 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9531 $dst$$Register, mask_len, mbt, vlen_enc); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9537 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9538 match(Set dst (VectorMaskTrueCount mask)); 9539 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9540 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9541 ins_encode %{ 9542 int opcode = this->ideal_Opcode(); 9543 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9544 int mask_len = Matcher::vector_length(this, $mask); 9545 int mask_size = mask_len * type2aelembytes(mbt); 9546 int vlen_enc = vector_length_encoding(this, $mask); 9547 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9548 $tmp$$Register, mask_len, mask_size, vlen_enc); 9549 %} 9550 ins_pipe( pipe_slow ); 9551 %} 9552 9553 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9554 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9555 match(Set dst (VectorMaskTrueCount mask)); 9556 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9557 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9558 ins_encode %{ 9559 int opcode = this->ideal_Opcode(); 9560 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9561 int mask_len = Matcher::vector_length(this, $mask); 9562 int vlen_enc = vector_length_encoding(this, $mask); 9563 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9564 $tmp$$Register, mask_len, mbt, vlen_enc); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9570 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9571 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9572 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9573 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9574 ins_encode %{ 9575 int opcode = this->ideal_Opcode(); 9576 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9577 int mask_len = Matcher::vector_length(this, $mask); 9578 int vlen_enc = vector_length_encoding(this, $mask); 9579 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9580 $tmp$$Register, mask_len, mbt, vlen_enc); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9586 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9587 match(Set dst (VectorMaskFirstTrue mask)); 9588 match(Set dst (VectorMaskLastTrue mask)); 9589 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9590 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9591 ins_encode %{ 9592 int opcode = this->ideal_Opcode(); 9593 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9594 int mask_len = Matcher::vector_length(this, $mask); 9595 int mask_size = mask_len * type2aelembytes(mbt); 9596 int vlen_enc = vector_length_encoding(this, $mask); 9597 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9598 $tmp$$Register, mask_len, mask_size, vlen_enc); 9599 %} 9600 ins_pipe( pipe_slow ); 9601 %} 9602 9603 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9604 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9605 match(Set dst (VectorMaskFirstTrue mask)); 9606 match(Set dst (VectorMaskLastTrue mask)); 9607 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9608 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9609 ins_encode %{ 9610 int opcode = this->ideal_Opcode(); 9611 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9612 int mask_len = Matcher::vector_length(this, $mask); 9613 int vlen_enc = vector_length_encoding(this, $mask); 9614 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9615 $tmp$$Register, mask_len, mbt, vlen_enc); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9621 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9622 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9623 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9624 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9625 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9626 ins_encode %{ 9627 int opcode = this->ideal_Opcode(); 9628 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9629 int mask_len = Matcher::vector_length(this, $mask); 9630 int vlen_enc = vector_length_encoding(this, $mask); 9631 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9632 $tmp$$Register, mask_len, mbt, vlen_enc); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 // --------------------------------- Compress/Expand Operations --------------------------- 9638 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9639 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9640 match(Set dst (CompressV src mask)); 9641 match(Set dst (ExpandV src mask)); 9642 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9643 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9644 ins_encode %{ 9645 int opcode = this->ideal_Opcode(); 9646 int vlen_enc = vector_length_encoding(this); 9647 BasicType bt = Matcher::vector_element_basic_type(this); 9648 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9649 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9650 %} 9651 ins_pipe( pipe_slow ); 9652 %} 9653 9654 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9655 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9656 match(Set dst (CompressV src mask)); 9657 match(Set dst (ExpandV src mask)); 9658 format %{ "vector_compress_expand $dst, $src, $mask" %} 9659 ins_encode %{ 9660 int opcode = this->ideal_Opcode(); 9661 int vector_len = vector_length_encoding(this); 9662 BasicType bt = Matcher::vector_element_basic_type(this); 9663 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9664 %} 9665 ins_pipe( pipe_slow ); 9666 %} 9667 9668 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9669 match(Set dst (CompressM mask)); 9670 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9671 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9672 ins_encode %{ 9673 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9674 int mask_len = Matcher::vector_length(this); 9675 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9676 %} 9677 ins_pipe( pipe_slow ); 9678 %} 9679 9680 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9681 9682 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9683 predicate(!VM_Version::supports_gfni()); 9684 match(Set dst (ReverseV src)); 9685 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9686 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9687 ins_encode %{ 9688 int vec_enc = vector_length_encoding(this); 9689 BasicType bt = Matcher::vector_element_basic_type(this); 9690 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9691 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9692 %} 9693 ins_pipe( pipe_slow ); 9694 %} 9695 9696 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9697 predicate(VM_Version::supports_gfni()); 9698 match(Set dst (ReverseV src)); 9699 effect(TEMP dst, TEMP xtmp); 9700 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9701 ins_encode %{ 9702 int vec_enc = vector_length_encoding(this); 9703 BasicType bt = Matcher::vector_element_basic_type(this); 9704 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9705 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9706 $xtmp$$XMMRegister); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 instruct vreverse_byte_reg(vec dst, vec src) %{ 9712 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9713 match(Set dst (ReverseBytesV src)); 9714 effect(TEMP dst); 9715 format %{ "vector_reverse_byte $dst, $src" %} 9716 ins_encode %{ 9717 int vec_enc = vector_length_encoding(this); 9718 BasicType bt = Matcher::vector_element_basic_type(this); 9719 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9720 %} 9721 ins_pipe( pipe_slow ); 9722 %} 9723 9724 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9725 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9726 match(Set dst (ReverseBytesV src)); 9727 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9728 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9729 ins_encode %{ 9730 int vec_enc = vector_length_encoding(this); 9731 BasicType bt = Matcher::vector_element_basic_type(this); 9732 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9733 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9734 %} 9735 ins_pipe( pipe_slow ); 9736 %} 9737 9738 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9739 9740 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9741 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9742 Matcher::vector_length_in_bytes(n->in(1)))); 9743 match(Set dst (CountLeadingZerosV src)); 9744 format %{ "vector_count_leading_zeros $dst, $src" %} 9745 ins_encode %{ 9746 int vlen_enc = vector_length_encoding(this, $src); 9747 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9748 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9749 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9755 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9756 Matcher::vector_length_in_bytes(n->in(1)))); 9757 match(Set dst (CountLeadingZerosV src mask)); 9758 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9759 ins_encode %{ 9760 int vlen_enc = vector_length_encoding(this, $src); 9761 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9762 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9763 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9764 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9765 %} 9766 ins_pipe( pipe_slow ); 9767 %} 9768 9769 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9770 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9771 VM_Version::supports_avx512cd() && 9772 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9773 match(Set dst (CountLeadingZerosV src)); 9774 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9775 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9776 ins_encode %{ 9777 int vlen_enc = vector_length_encoding(this, $src); 9778 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9779 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9780 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9781 %} 9782 ins_pipe( pipe_slow ); 9783 %} 9784 9785 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9786 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9787 match(Set dst (CountLeadingZerosV src)); 9788 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9789 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9790 ins_encode %{ 9791 int vlen_enc = vector_length_encoding(this, $src); 9792 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9793 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9794 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9795 $rtmp$$Register, true, vlen_enc); 9796 %} 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9801 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9802 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9803 match(Set dst (CountLeadingZerosV src)); 9804 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9805 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9806 ins_encode %{ 9807 int vlen_enc = vector_length_encoding(this, $src); 9808 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9809 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9810 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9811 %} 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9816 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9817 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9818 match(Set dst (CountLeadingZerosV src)); 9819 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9820 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9821 ins_encode %{ 9822 int vlen_enc = vector_length_encoding(this, $src); 9823 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9824 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9825 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9826 %} 9827 ins_pipe( pipe_slow ); 9828 %} 9829 9830 // ---------------------------------- Vector Masked Operations ------------------------------------ 9831 9832 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9833 match(Set dst (AddVB (Binary dst src2) mask)); 9834 match(Set dst (AddVS (Binary dst src2) mask)); 9835 match(Set dst (AddVI (Binary dst src2) mask)); 9836 match(Set dst (AddVL (Binary dst src2) mask)); 9837 match(Set dst (AddVF (Binary dst src2) mask)); 9838 match(Set dst (AddVD (Binary dst src2) mask)); 9839 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9840 ins_encode %{ 9841 int vlen_enc = vector_length_encoding(this); 9842 BasicType bt = Matcher::vector_element_basic_type(this); 9843 int opc = this->ideal_Opcode(); 9844 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9845 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9846 %} 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9851 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9852 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9853 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9854 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9855 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9856 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9857 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9858 ins_encode %{ 9859 int vlen_enc = vector_length_encoding(this); 9860 BasicType bt = Matcher::vector_element_basic_type(this); 9861 int opc = this->ideal_Opcode(); 9862 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9863 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9869 match(Set dst (XorV (Binary dst src2) mask)); 9870 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9871 ins_encode %{ 9872 int vlen_enc = vector_length_encoding(this); 9873 BasicType bt = Matcher::vector_element_basic_type(this); 9874 int opc = this->ideal_Opcode(); 9875 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9876 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9877 %} 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9882 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9883 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9884 ins_encode %{ 9885 int vlen_enc = vector_length_encoding(this); 9886 BasicType bt = Matcher::vector_element_basic_type(this); 9887 int opc = this->ideal_Opcode(); 9888 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9889 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9895 match(Set dst (OrV (Binary dst src2) mask)); 9896 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9897 ins_encode %{ 9898 int vlen_enc = vector_length_encoding(this); 9899 BasicType bt = Matcher::vector_element_basic_type(this); 9900 int opc = this->ideal_Opcode(); 9901 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9902 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9908 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9909 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9910 ins_encode %{ 9911 int vlen_enc = vector_length_encoding(this); 9912 BasicType bt = Matcher::vector_element_basic_type(this); 9913 int opc = this->ideal_Opcode(); 9914 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9915 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9916 %} 9917 ins_pipe( pipe_slow ); 9918 %} 9919 9920 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9921 match(Set dst (AndV (Binary dst src2) mask)); 9922 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9923 ins_encode %{ 9924 int vlen_enc = vector_length_encoding(this); 9925 BasicType bt = Matcher::vector_element_basic_type(this); 9926 int opc = this->ideal_Opcode(); 9927 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9928 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9929 %} 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9934 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9935 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9936 ins_encode %{ 9937 int vlen_enc = vector_length_encoding(this); 9938 BasicType bt = Matcher::vector_element_basic_type(this); 9939 int opc = this->ideal_Opcode(); 9940 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9941 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9942 %} 9943 ins_pipe( pipe_slow ); 9944 %} 9945 9946 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9947 match(Set dst (SubVB (Binary dst src2) mask)); 9948 match(Set dst (SubVS (Binary dst src2) mask)); 9949 match(Set dst (SubVI (Binary dst src2) mask)); 9950 match(Set dst (SubVL (Binary dst src2) mask)); 9951 match(Set dst (SubVF (Binary dst src2) mask)); 9952 match(Set dst (SubVD (Binary dst src2) mask)); 9953 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9954 ins_encode %{ 9955 int vlen_enc = vector_length_encoding(this); 9956 BasicType bt = Matcher::vector_element_basic_type(this); 9957 int opc = this->ideal_Opcode(); 9958 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9959 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9960 %} 9961 ins_pipe( pipe_slow ); 9962 %} 9963 9964 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9965 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9966 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9967 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9968 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9969 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9970 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9971 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9972 ins_encode %{ 9973 int vlen_enc = vector_length_encoding(this); 9974 BasicType bt = Matcher::vector_element_basic_type(this); 9975 int opc = this->ideal_Opcode(); 9976 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9977 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9978 %} 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9983 match(Set dst (MulVS (Binary dst src2) mask)); 9984 match(Set dst (MulVI (Binary dst src2) mask)); 9985 match(Set dst (MulVL (Binary dst src2) mask)); 9986 match(Set dst (MulVF (Binary dst src2) mask)); 9987 match(Set dst (MulVD (Binary dst src2) mask)); 9988 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9989 ins_encode %{ 9990 int vlen_enc = vector_length_encoding(this); 9991 BasicType bt = Matcher::vector_element_basic_type(this); 9992 int opc = this->ideal_Opcode(); 9993 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9994 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9995 %} 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10000 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10001 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10002 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10003 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10004 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10005 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10006 ins_encode %{ 10007 int vlen_enc = vector_length_encoding(this); 10008 BasicType bt = Matcher::vector_element_basic_type(this); 10009 int opc = this->ideal_Opcode(); 10010 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10011 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10012 %} 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10017 match(Set dst (SqrtVF dst mask)); 10018 match(Set dst (SqrtVD dst mask)); 10019 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10020 ins_encode %{ 10021 int vlen_enc = vector_length_encoding(this); 10022 BasicType bt = Matcher::vector_element_basic_type(this); 10023 int opc = this->ideal_Opcode(); 10024 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10025 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10026 %} 10027 ins_pipe( pipe_slow ); 10028 %} 10029 10030 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10031 match(Set dst (DivVF (Binary dst src2) mask)); 10032 match(Set dst (DivVD (Binary dst src2) mask)); 10033 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10034 ins_encode %{ 10035 int vlen_enc = vector_length_encoding(this); 10036 BasicType bt = Matcher::vector_element_basic_type(this); 10037 int opc = this->ideal_Opcode(); 10038 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10039 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10040 %} 10041 ins_pipe( pipe_slow ); 10042 %} 10043 10044 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10045 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10046 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10047 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10048 ins_encode %{ 10049 int vlen_enc = vector_length_encoding(this); 10050 BasicType bt = Matcher::vector_element_basic_type(this); 10051 int opc = this->ideal_Opcode(); 10052 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10053 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 10059 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10060 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10061 match(Set dst (RotateRightV (Binary dst shift) mask)); 10062 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10063 ins_encode %{ 10064 int vlen_enc = vector_length_encoding(this); 10065 BasicType bt = Matcher::vector_element_basic_type(this); 10066 int opc = this->ideal_Opcode(); 10067 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10068 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10069 %} 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10074 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10075 match(Set dst (RotateRightV (Binary dst src2) mask)); 10076 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10077 ins_encode %{ 10078 int vlen_enc = vector_length_encoding(this); 10079 BasicType bt = Matcher::vector_element_basic_type(this); 10080 int opc = this->ideal_Opcode(); 10081 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10082 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10083 %} 10084 ins_pipe( pipe_slow ); 10085 %} 10086 10087 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10088 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10089 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10090 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10091 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10092 ins_encode %{ 10093 int vlen_enc = vector_length_encoding(this); 10094 BasicType bt = Matcher::vector_element_basic_type(this); 10095 int opc = this->ideal_Opcode(); 10096 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10097 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10103 predicate(!n->as_ShiftV()->is_var_shift()); 10104 match(Set dst (LShiftVS (Binary dst src2) mask)); 10105 match(Set dst (LShiftVI (Binary dst src2) mask)); 10106 match(Set dst (LShiftVL (Binary dst src2) mask)); 10107 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10108 ins_encode %{ 10109 int vlen_enc = vector_length_encoding(this); 10110 BasicType bt = Matcher::vector_element_basic_type(this); 10111 int opc = this->ideal_Opcode(); 10112 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10113 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10114 %} 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10119 predicate(n->as_ShiftV()->is_var_shift()); 10120 match(Set dst (LShiftVS (Binary dst src2) mask)); 10121 match(Set dst (LShiftVI (Binary dst src2) mask)); 10122 match(Set dst (LShiftVL (Binary dst src2) mask)); 10123 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10124 ins_encode %{ 10125 int vlen_enc = vector_length_encoding(this); 10126 BasicType bt = Matcher::vector_element_basic_type(this); 10127 int opc = this->ideal_Opcode(); 10128 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10129 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10130 %} 10131 ins_pipe( pipe_slow ); 10132 %} 10133 10134 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10135 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10136 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10137 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10138 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10139 ins_encode %{ 10140 int vlen_enc = vector_length_encoding(this); 10141 BasicType bt = Matcher::vector_element_basic_type(this); 10142 int opc = this->ideal_Opcode(); 10143 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10144 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10150 predicate(!n->as_ShiftV()->is_var_shift()); 10151 match(Set dst (RShiftVS (Binary dst src2) mask)); 10152 match(Set dst (RShiftVI (Binary dst src2) mask)); 10153 match(Set dst (RShiftVL (Binary dst src2) mask)); 10154 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10155 ins_encode %{ 10156 int vlen_enc = vector_length_encoding(this); 10157 BasicType bt = Matcher::vector_element_basic_type(this); 10158 int opc = this->ideal_Opcode(); 10159 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10160 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10161 %} 10162 ins_pipe( pipe_slow ); 10163 %} 10164 10165 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10166 predicate(n->as_ShiftV()->is_var_shift()); 10167 match(Set dst (RShiftVS (Binary dst src2) mask)); 10168 match(Set dst (RShiftVI (Binary dst src2) mask)); 10169 match(Set dst (RShiftVL (Binary dst src2) mask)); 10170 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10171 ins_encode %{ 10172 int vlen_enc = vector_length_encoding(this); 10173 BasicType bt = Matcher::vector_element_basic_type(this); 10174 int opc = this->ideal_Opcode(); 10175 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10176 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10177 %} 10178 ins_pipe( pipe_slow ); 10179 %} 10180 10181 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10182 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10183 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10184 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10185 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10186 ins_encode %{ 10187 int vlen_enc = vector_length_encoding(this); 10188 BasicType bt = Matcher::vector_element_basic_type(this); 10189 int opc = this->ideal_Opcode(); 10190 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10191 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10192 %} 10193 ins_pipe( pipe_slow ); 10194 %} 10195 10196 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10197 predicate(!n->as_ShiftV()->is_var_shift()); 10198 match(Set dst (URShiftVS (Binary dst src2) mask)); 10199 match(Set dst (URShiftVI (Binary dst src2) mask)); 10200 match(Set dst (URShiftVL (Binary dst src2) mask)); 10201 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10202 ins_encode %{ 10203 int vlen_enc = vector_length_encoding(this); 10204 BasicType bt = Matcher::vector_element_basic_type(this); 10205 int opc = this->ideal_Opcode(); 10206 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10207 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10208 %} 10209 ins_pipe( pipe_slow ); 10210 %} 10211 10212 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10213 predicate(n->as_ShiftV()->is_var_shift()); 10214 match(Set dst (URShiftVS (Binary dst src2) mask)); 10215 match(Set dst (URShiftVI (Binary dst src2) mask)); 10216 match(Set dst (URShiftVL (Binary dst src2) mask)); 10217 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10218 ins_encode %{ 10219 int vlen_enc = vector_length_encoding(this); 10220 BasicType bt = Matcher::vector_element_basic_type(this); 10221 int opc = this->ideal_Opcode(); 10222 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10223 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10224 %} 10225 ins_pipe( pipe_slow ); 10226 %} 10227 10228 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10229 match(Set dst (MaxV (Binary dst src2) mask)); 10230 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10231 ins_encode %{ 10232 int vlen_enc = vector_length_encoding(this); 10233 BasicType bt = Matcher::vector_element_basic_type(this); 10234 int opc = this->ideal_Opcode(); 10235 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10236 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10242 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10243 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10244 ins_encode %{ 10245 int vlen_enc = vector_length_encoding(this); 10246 BasicType bt = Matcher::vector_element_basic_type(this); 10247 int opc = this->ideal_Opcode(); 10248 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10249 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10250 %} 10251 ins_pipe( pipe_slow ); 10252 %} 10253 10254 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10255 match(Set dst (MinV (Binary dst src2) mask)); 10256 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10257 ins_encode %{ 10258 int vlen_enc = vector_length_encoding(this); 10259 BasicType bt = Matcher::vector_element_basic_type(this); 10260 int opc = this->ideal_Opcode(); 10261 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10262 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10263 %} 10264 ins_pipe( pipe_slow ); 10265 %} 10266 10267 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10268 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10269 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10270 ins_encode %{ 10271 int vlen_enc = vector_length_encoding(this); 10272 BasicType bt = Matcher::vector_element_basic_type(this); 10273 int opc = this->ideal_Opcode(); 10274 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10275 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10276 %} 10277 ins_pipe( pipe_slow ); 10278 %} 10279 10280 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10281 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10282 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10283 ins_encode %{ 10284 int vlen_enc = vector_length_encoding(this); 10285 BasicType bt = Matcher::vector_element_basic_type(this); 10286 int opc = this->ideal_Opcode(); 10287 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10288 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10289 %} 10290 ins_pipe( pipe_slow ); 10291 %} 10292 10293 instruct vabs_masked(vec dst, kReg mask) %{ 10294 match(Set dst (AbsVB dst mask)); 10295 match(Set dst (AbsVS dst mask)); 10296 match(Set dst (AbsVI dst mask)); 10297 match(Set dst (AbsVL dst mask)); 10298 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10299 ins_encode %{ 10300 int vlen_enc = vector_length_encoding(this); 10301 BasicType bt = Matcher::vector_element_basic_type(this); 10302 int opc = this->ideal_Opcode(); 10303 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10304 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10305 %} 10306 ins_pipe( pipe_slow ); 10307 %} 10308 10309 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10310 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10311 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10312 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10313 ins_encode %{ 10314 assert(UseFMA, "Needs FMA instructions support."); 10315 int vlen_enc = vector_length_encoding(this); 10316 BasicType bt = Matcher::vector_element_basic_type(this); 10317 int opc = this->ideal_Opcode(); 10318 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10319 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10320 %} 10321 ins_pipe( pipe_slow ); 10322 %} 10323 10324 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10325 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10326 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10327 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10328 ins_encode %{ 10329 assert(UseFMA, "Needs FMA instructions support."); 10330 int vlen_enc = vector_length_encoding(this); 10331 BasicType bt = Matcher::vector_element_basic_type(this); 10332 int opc = this->ideal_Opcode(); 10333 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10334 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10335 %} 10336 ins_pipe( pipe_slow ); 10337 %} 10338 10339 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10340 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10341 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10342 ins_encode %{ 10343 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10344 int vlen_enc = vector_length_encoding(this, $src1); 10345 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10346 10347 // Comparison i 10348 switch (src1_elem_bt) { 10349 case T_BYTE: { 10350 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10351 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10352 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10353 break; 10354 } 10355 case T_SHORT: { 10356 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10357 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10358 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10359 break; 10360 } 10361 case T_INT: { 10362 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10363 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10364 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10365 break; 10366 } 10367 case T_LONG: { 10368 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10369 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10370 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10371 break; 10372 } 10373 case T_FLOAT: { 10374 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10375 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10376 break; 10377 } 10378 case T_DOUBLE: { 10379 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10380 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10381 break; 10382 } 10383 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10384 } 10385 %} 10386 ins_pipe( pipe_slow ); 10387 %} 10388 10389 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10390 predicate(Matcher::vector_length(n) <= 32); 10391 match(Set dst (MaskAll src)); 10392 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10393 ins_encode %{ 10394 int mask_len = Matcher::vector_length(this); 10395 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10396 %} 10397 ins_pipe( pipe_slow ); 10398 %} 10399 10400 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10401 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10402 match(Set dst (XorVMask src (MaskAll cnt))); 10403 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10404 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10405 ins_encode %{ 10406 uint masklen = Matcher::vector_length(this); 10407 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10408 %} 10409 ins_pipe( pipe_slow ); 10410 %} 10411 10412 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10413 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10414 (Matcher::vector_length(n) == 16) || 10415 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10416 match(Set dst (XorVMask src (MaskAll cnt))); 10417 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10418 ins_encode %{ 10419 uint masklen = Matcher::vector_length(this); 10420 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10421 %} 10422 ins_pipe( pipe_slow ); 10423 %} 10424 10425 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10426 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10427 match(Set dst (VectorLongToMask src)); 10428 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10429 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10430 ins_encode %{ 10431 int mask_len = Matcher::vector_length(this); 10432 int vec_enc = vector_length_encoding(mask_len); 10433 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10434 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10435 %} 10436 ins_pipe( pipe_slow ); 10437 %} 10438 10439 10440 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10441 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10442 match(Set dst (VectorLongToMask src)); 10443 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10444 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10445 ins_encode %{ 10446 int mask_len = Matcher::vector_length(this); 10447 assert(mask_len <= 32, "invalid mask length"); 10448 int vec_enc = vector_length_encoding(mask_len); 10449 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10450 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10451 %} 10452 ins_pipe( pipe_slow ); 10453 %} 10454 10455 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10456 predicate(n->bottom_type()->isa_vectmask()); 10457 match(Set dst (VectorLongToMask src)); 10458 format %{ "long_to_mask_evex $dst, $src\t!" %} 10459 ins_encode %{ 10460 __ kmov($dst$$KRegister, $src$$Register); 10461 %} 10462 ins_pipe( pipe_slow ); 10463 %} 10464 10465 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10466 match(Set dst (AndVMask src1 src2)); 10467 match(Set dst (OrVMask src1 src2)); 10468 match(Set dst (XorVMask src1 src2)); 10469 effect(TEMP kscratch); 10470 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10471 ins_encode %{ 10472 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10473 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10474 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10475 uint masklen = Matcher::vector_length(this); 10476 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10477 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10478 %} 10479 ins_pipe( pipe_slow ); 10480 %} 10481 10482 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10483 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10484 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10485 ins_encode %{ 10486 int vlen_enc = vector_length_encoding(this); 10487 BasicType bt = Matcher::vector_element_basic_type(this); 10488 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10489 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10490 %} 10491 ins_pipe( pipe_slow ); 10492 %} 10493 10494 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10495 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10496 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10497 ins_encode %{ 10498 int vlen_enc = vector_length_encoding(this); 10499 BasicType bt = Matcher::vector_element_basic_type(this); 10500 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10501 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10502 %} 10503 ins_pipe( pipe_slow ); 10504 %} 10505 10506 instruct castMM(kReg dst) 10507 %{ 10508 match(Set dst (CastVV dst)); 10509 10510 size(0); 10511 format %{ "# castVV of $dst" %} 10512 ins_encode(/* empty encoding */); 10513 ins_cost(0); 10514 ins_pipe(empty); 10515 %} 10516 10517 instruct castVV(vec dst) 10518 %{ 10519 match(Set dst (CastVV dst)); 10520 10521 size(0); 10522 format %{ "# castVV of $dst" %} 10523 ins_encode(/* empty encoding */); 10524 ins_cost(0); 10525 ins_pipe(empty); 10526 %} 10527 10528 instruct castVVLeg(legVec dst) 10529 %{ 10530 match(Set dst (CastVV dst)); 10531 10532 size(0); 10533 format %{ "# castVV of $dst" %} 10534 ins_encode(/* empty encoding */); 10535 ins_cost(0); 10536 ins_pipe(empty); 10537 %} 10538 10539 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10540 %{ 10541 match(Set dst (IsInfiniteF src)); 10542 effect(TEMP ktmp, KILL cr); 10543 format %{ "float_class_check $dst, $src" %} 10544 ins_encode %{ 10545 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10546 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10547 %} 10548 ins_pipe(pipe_slow); 10549 %} 10550 10551 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10552 %{ 10553 match(Set dst (IsInfiniteD src)); 10554 effect(TEMP ktmp, KILL cr); 10555 format %{ "double_class_check $dst, $src" %} 10556 ins_encode %{ 10557 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10558 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10559 %} 10560 ins_pipe(pipe_slow); 10561 %} 10562 10563 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10564 %{ 10565 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10566 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10567 match(Set dst (SaturatingAddV src1 src2)); 10568 match(Set dst (SaturatingSubV src1 src2)); 10569 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10570 ins_encode %{ 10571 int vlen_enc = vector_length_encoding(this); 10572 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10573 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10574 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10575 %} 10576 ins_pipe(pipe_slow); 10577 %} 10578 10579 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10580 %{ 10581 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10582 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10583 match(Set dst (SaturatingAddV src1 src2)); 10584 match(Set dst (SaturatingSubV src1 src2)); 10585 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10586 ins_encode %{ 10587 int vlen_enc = vector_length_encoding(this); 10588 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10589 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10590 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10591 %} 10592 ins_pipe(pipe_slow); 10593 %} 10594 10595 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10596 %{ 10597 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10598 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10599 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10600 match(Set dst (SaturatingAddV src1 src2)); 10601 match(Set dst (SaturatingSubV src1 src2)); 10602 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10603 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10604 ins_encode %{ 10605 int vlen_enc = vector_length_encoding(this); 10606 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10607 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10608 $src1$$XMMRegister, $src2$$XMMRegister, 10609 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10610 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10611 %} 10612 ins_pipe(pipe_slow); 10613 %} 10614 10615 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10616 %{ 10617 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10618 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10619 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10620 match(Set dst (SaturatingAddV src1 src2)); 10621 match(Set dst (SaturatingSubV src1 src2)); 10622 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10623 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10624 ins_encode %{ 10625 int vlen_enc = vector_length_encoding(this); 10626 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10627 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10628 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10629 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10630 %} 10631 ins_pipe(pipe_slow); 10632 %} 10633 10634 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10635 %{ 10636 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10637 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10638 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10639 match(Set dst (SaturatingAddV src1 src2)); 10640 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10641 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10642 ins_encode %{ 10643 int vlen_enc = vector_length_encoding(this); 10644 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10645 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10646 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10647 %} 10648 ins_pipe(pipe_slow); 10649 %} 10650 10651 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10652 %{ 10653 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10654 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10655 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10656 match(Set dst (SaturatingAddV src1 src2)); 10657 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10658 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10659 ins_encode %{ 10660 int vlen_enc = vector_length_encoding(this); 10661 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10662 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10663 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10664 %} 10665 ins_pipe(pipe_slow); 10666 %} 10667 10668 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10669 %{ 10670 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10671 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10672 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10673 match(Set dst (SaturatingSubV src1 src2)); 10674 effect(TEMP ktmp); 10675 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10676 ins_encode %{ 10677 int vlen_enc = vector_length_encoding(this); 10678 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10679 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10680 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10681 %} 10682 ins_pipe(pipe_slow); 10683 %} 10684 10685 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10686 %{ 10687 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10688 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10689 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10690 match(Set dst (SaturatingSubV src1 src2)); 10691 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10692 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10693 ins_encode %{ 10694 int vlen_enc = vector_length_encoding(this); 10695 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10696 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10697 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10698 %} 10699 ins_pipe(pipe_slow); 10700 %} 10701 10702 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10703 %{ 10704 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10705 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10706 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10707 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10708 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10709 ins_encode %{ 10710 int vlen_enc = vector_length_encoding(this); 10711 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10712 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10713 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10714 %} 10715 ins_pipe(pipe_slow); 10716 %} 10717 10718 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10719 %{ 10720 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10721 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10722 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10723 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10724 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10725 ins_encode %{ 10726 int vlen_enc = vector_length_encoding(this); 10727 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10728 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10729 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10730 %} 10731 ins_pipe(pipe_slow); 10732 %} 10733 10734 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10735 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10736 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10737 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10738 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10739 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10740 ins_encode %{ 10741 int vlen_enc = vector_length_encoding(this); 10742 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10743 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10744 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10745 %} 10746 ins_pipe( pipe_slow ); 10747 %} 10748 10749 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10750 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10751 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10752 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10753 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10754 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10755 ins_encode %{ 10756 int vlen_enc = vector_length_encoding(this); 10757 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10758 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10759 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10760 %} 10761 ins_pipe( pipe_slow ); 10762 %} 10763 10764 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10765 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10766 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10767 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10768 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10769 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10770 ins_encode %{ 10771 int vlen_enc = vector_length_encoding(this); 10772 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10773 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10774 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10780 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10781 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10782 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10783 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10784 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10785 ins_encode %{ 10786 int vlen_enc = vector_length_encoding(this); 10787 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10788 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10789 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10790 %} 10791 ins_pipe( pipe_slow ); 10792 %} 10793 10794 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10795 %{ 10796 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10797 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10798 ins_encode %{ 10799 int vlen_enc = vector_length_encoding(this); 10800 BasicType bt = Matcher::vector_element_basic_type(this); 10801 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10802 %} 10803 ins_pipe(pipe_slow); 10804 %} 10805 10806 instruct reinterpretS2HF(regF dst, rRegI src) 10807 %{ 10808 match(Set dst (ReinterpretS2HF src)); 10809 format %{ "vmovw $dst, $src" %} 10810 ins_encode %{ 10811 __ vmovw($dst$$XMMRegister, $src$$Register); 10812 %} 10813 ins_pipe(pipe_slow); 10814 %} 10815 10816 instruct reinterpretHF2S(rRegI dst, regF src) 10817 %{ 10818 match(Set dst (ReinterpretHF2S src)); 10819 format %{ "vmovw $dst, $src" %} 10820 ins_encode %{ 10821 __ vmovw($dst$$Register, $src$$XMMRegister); 10822 %} 10823 ins_pipe(pipe_slow); 10824 %} 10825 10826 instruct convF2HFAndS2HF(regF dst, regF src) 10827 %{ 10828 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10829 format %{ "convF2HFAndS2HF $dst, $src" %} 10830 ins_encode %{ 10831 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10832 %} 10833 ins_pipe(pipe_slow); 10834 %} 10835 10836 instruct convHF2SAndHF2F(regF dst, regF src) 10837 %{ 10838 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10839 format %{ "convHF2SAndHF2F $dst, $src" %} 10840 ins_encode %{ 10841 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10842 %} 10843 ins_pipe(pipe_slow); 10844 %} 10845 10846 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10847 %{ 10848 match(Set dst (SqrtHF src)); 10849 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10850 ins_encode %{ 10851 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10852 %} 10853 ins_pipe(pipe_slow); 10854 %} 10855 10856 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10857 %{ 10858 match(Set dst (AddHF src1 src2)); 10859 match(Set dst (DivHF src1 src2)); 10860 match(Set dst (MulHF src1 src2)); 10861 match(Set dst (SubHF src1 src2)); 10862 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10863 ins_encode %{ 10864 int opcode = this->ideal_Opcode(); 10865 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10866 %} 10867 ins_pipe(pipe_slow); 10868 %} 10869 10870 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) 10871 %{ 10872 predicate(VM_Version::supports_avx10_2()); 10873 match(Set dst (MaxHF src1 src2)); 10874 match(Set dst (MinHF src1 src2)); 10875 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} 10876 ins_encode %{ 10877 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10878 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); 10879 %} 10880 ins_pipe( pipe_slow ); 10881 %} 10882 10883 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10884 %{ 10885 predicate(!VM_Version::supports_avx10_2()); 10886 match(Set dst (MaxHF src1 src2)); 10887 match(Set dst (MinHF src1 src2)); 10888 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10889 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10890 ins_encode %{ 10891 int opcode = this->ideal_Opcode(); 10892 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10893 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10894 %} 10895 ins_pipe( pipe_slow ); 10896 %} 10897 10898 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10899 %{ 10900 match(Set dst (FmaHF src2 (Binary dst src1))); 10901 effect(DEF dst); 10902 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10903 ins_encode %{ 10904 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10905 %} 10906 ins_pipe( pipe_slow ); 10907 %} 10908 10909 10910 instruct vector_sqrt_HF_reg(vec dst, vec src) 10911 %{ 10912 match(Set dst (SqrtVHF src)); 10913 format %{ "vector_sqrt_fp16 $dst, $src" %} 10914 ins_encode %{ 10915 int vlen_enc = vector_length_encoding(this); 10916 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10917 %} 10918 ins_pipe(pipe_slow); 10919 %} 10920 10921 instruct vector_sqrt_HF_mem(vec dst, memory src) 10922 %{ 10923 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10924 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10925 ins_encode %{ 10926 int vlen_enc = vector_length_encoding(this); 10927 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10928 %} 10929 ins_pipe(pipe_slow); 10930 %} 10931 10932 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10933 %{ 10934 match(Set dst (AddVHF src1 src2)); 10935 match(Set dst (DivVHF src1 src2)); 10936 match(Set dst (MulVHF src1 src2)); 10937 match(Set dst (SubVHF src1 src2)); 10938 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10939 ins_encode %{ 10940 int vlen_enc = vector_length_encoding(this); 10941 int opcode = this->ideal_Opcode(); 10942 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10943 %} 10944 ins_pipe(pipe_slow); 10945 %} 10946 10947 10948 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10949 %{ 10950 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10951 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10952 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10953 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10954 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10955 ins_encode %{ 10956 int vlen_enc = vector_length_encoding(this); 10957 int opcode = this->ideal_Opcode(); 10958 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10959 %} 10960 ins_pipe(pipe_slow); 10961 %} 10962 10963 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10964 %{ 10965 match(Set dst (FmaVHF src2 (Binary dst src1))); 10966 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10967 ins_encode %{ 10968 int vlen_enc = vector_length_encoding(this); 10969 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10970 %} 10971 ins_pipe( pipe_slow ); 10972 %} 10973 10974 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10975 %{ 10976 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10977 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10978 ins_encode %{ 10979 int vlen_enc = vector_length_encoding(this); 10980 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10981 %} 10982 ins_pipe( pipe_slow ); 10983 %} 10984 10985 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) 10986 %{ 10987 predicate(VM_Version::supports_avx10_2()); 10988 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); 10989 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); 10990 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} 10991 ins_encode %{ 10992 int vlen_enc = vector_length_encoding(this); 10993 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10994 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); 10995 %} 10996 ins_pipe( pipe_slow ); 10997 %} 10998 10999 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) 11000 %{ 11001 predicate(VM_Version::supports_avx10_2()); 11002 match(Set dst (MinVHF src1 src2)); 11003 match(Set dst (MaxVHF src1 src2)); 11004 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} 11005 ins_encode %{ 11006 int vlen_enc = vector_length_encoding(this); 11007 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 11008 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); 11009 %} 11010 ins_pipe( pipe_slow ); 11011 %} 11012 11013 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 11014 %{ 11015 predicate(!VM_Version::supports_avx10_2()); 11016 match(Set dst (MinVHF src1 src2)); 11017 match(Set dst (MaxVHF src1 src2)); 11018 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 11019 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 11020 ins_encode %{ 11021 int vlen_enc = vector_length_encoding(this); 11022 int opcode = this->ideal_Opcode(); 11023 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 11024 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 11025 %} 11026 ins_pipe( pipe_slow ); 11027 %}