1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AbsVB: 1372 case Op_AbsVS: 1373 case Op_AbsVI: 1374 case Op_AddReductionVI: 1375 case Op_AndReductionV: 1376 case Op_OrReductionV: 1377 case Op_XorReductionV: 1378 if (UseSSE < 3) { // requires at least SSSE3 1379 return false; 1380 } 1381 break; 1382 case Op_MaxHF: 1383 case Op_MinHF: 1384 if (!VM_Version::supports_avx512vlbw()) { 1385 return false; 1386 } // fallthrough 1387 case Op_AddHF: 1388 case Op_DivHF: 1389 case Op_FmaHF: 1390 case Op_MulHF: 1391 case Op_ReinterpretS2HF: 1392 case Op_ReinterpretHF2S: 1393 case Op_SubHF: 1394 case Op_SqrtHF: 1395 if (!VM_Version::supports_avx512_fp16()) { 1396 return false; 1397 } 1398 break; 1399 case Op_VectorLoadShuffle: 1400 case Op_VectorRearrange: 1401 case Op_MulReductionVI: 1402 if (UseSSE < 4) { // requires at least SSE4 1403 return false; 1404 } 1405 break; 1406 case Op_IsInfiniteF: 1407 case Op_IsInfiniteD: 1408 if (!VM_Version::supports_avx512dq()) { 1409 return false; 1410 } 1411 break; 1412 case Op_SqrtVD: 1413 case Op_SqrtVF: 1414 case Op_VectorMaskCmp: 1415 case Op_VectorCastB2X: 1416 case Op_VectorCastS2X: 1417 case Op_VectorCastI2X: 1418 case Op_VectorCastL2X: 1419 case Op_VectorCastF2X: 1420 case Op_VectorCastD2X: 1421 case Op_VectorUCastB2X: 1422 case Op_VectorUCastS2X: 1423 case Op_VectorUCastI2X: 1424 case Op_VectorMaskCast: 1425 if (UseAVX < 1) { // enabled for AVX only 1426 return false; 1427 } 1428 break; 1429 case Op_PopulateIndex: 1430 if (UseAVX < 2) { 1431 return false; 1432 } 1433 break; 1434 case Op_RoundVF: 1435 if (UseAVX < 2) { // enabled for AVX2 only 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVD: 1440 if (UseAVX < 3) { 1441 return false; // enabled for AVX3 only 1442 } 1443 break; 1444 case Op_CompareAndSwapL: 1445 case Op_CompareAndSwapP: 1446 break; 1447 case Op_StrIndexOf: 1448 if (!UseSSE42Intrinsics) { 1449 return false; 1450 } 1451 break; 1452 case Op_StrIndexOfChar: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_OnSpinWait: 1458 if (VM_Version::supports_on_spin_wait() == false) { 1459 return false; 1460 } 1461 break; 1462 case Op_MulVB: 1463 case Op_LShiftVB: 1464 case Op_RShiftVB: 1465 case Op_URShiftVB: 1466 case Op_VectorInsert: 1467 case Op_VectorLoadMask: 1468 case Op_VectorStoreMask: 1469 case Op_VectorBlend: 1470 if (UseSSE < 4) { 1471 return false; 1472 } 1473 break; 1474 case Op_MaxD: 1475 case Op_MaxF: 1476 case Op_MinD: 1477 case Op_MinF: 1478 if (UseAVX < 1) { // enabled for AVX only 1479 return false; 1480 } 1481 break; 1482 case Op_CacheWB: 1483 case Op_CacheWBPreSync: 1484 case Op_CacheWBPostSync: 1485 if (!VM_Version::supports_data_cache_line_flush()) { 1486 return false; 1487 } 1488 break; 1489 case Op_ExtractB: 1490 case Op_ExtractL: 1491 case Op_ExtractI: 1492 case Op_RoundDoubleMode: 1493 if (UseSSE < 4) { 1494 return false; 1495 } 1496 break; 1497 case Op_RoundDoubleModeV: 1498 if (VM_Version::supports_avx() == false) { 1499 return false; // 128bit vroundpd is not available 1500 } 1501 break; 1502 case Op_LoadVectorGather: 1503 case Op_LoadVectorGatherMasked: 1504 if (UseAVX < 2) { 1505 return false; 1506 } 1507 break; 1508 case Op_FmaF: 1509 case Op_FmaD: 1510 case Op_FmaVD: 1511 case Op_FmaVF: 1512 if (!UseFMA) { 1513 return false; 1514 } 1515 break; 1516 case Op_MacroLogicV: 1517 if (UseAVX < 3 || !UseVectorMacroLogic) { 1518 return false; 1519 } 1520 break; 1521 1522 case Op_VectorCmpMasked: 1523 case Op_VectorMaskGen: 1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1525 return false; 1526 } 1527 break; 1528 case Op_VectorMaskFirstTrue: 1529 case Op_VectorMaskLastTrue: 1530 case Op_VectorMaskTrueCount: 1531 case Op_VectorMaskToLong: 1532 if (UseAVX < 1) { 1533 return false; 1534 } 1535 break; 1536 case Op_RoundF: 1537 case Op_RoundD: 1538 break; 1539 case Op_CopySignD: 1540 case Op_CopySignF: 1541 if (UseAVX < 3) { 1542 return false; 1543 } 1544 if (!VM_Version::supports_avx512vl()) { 1545 return false; 1546 } 1547 break; 1548 case Op_CompressBits: 1549 case Op_ExpandBits: 1550 if (!VM_Version::supports_bmi2()) { 1551 return false; 1552 } 1553 break; 1554 case Op_CompressM: 1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_ConvF2HF: 1560 case Op_ConvHF2F: 1561 if (!VM_Version::supports_float16()) { 1562 return false; 1563 } 1564 break; 1565 case Op_VectorCastF2HF: 1566 case Op_VectorCastHF2F: 1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1568 return false; 1569 } 1570 break; 1571 } 1572 return true; // Match rules are supported by default. 1573 } 1574 1575 //------------------------------------------------------------------------ 1576 1577 static inline bool is_pop_count_instr_target(BasicType bt) { 1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1580 } 1581 1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1583 return match_rule_supported_vector(opcode, vlen, bt); 1584 } 1585 1586 // Identify extra cases that we might want to provide match rules for vector nodes and 1587 // other intrinsics guarded with vector length (vlen) and element type (bt). 1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1589 if (!match_rule_supported(opcode)) { 1590 return false; 1591 } 1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1593 // * SSE2 supports 128bit vectors for all types; 1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1595 // * AVX2 supports 256bit vectors for all types; 1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1599 // And MaxVectorSize is taken into account as well. 1600 if (!vector_size_supported(bt, vlen)) { 1601 return false; 1602 } 1603 // Special cases which require vector length follow: 1604 // * implementation limitations 1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1606 // * 128bit vroundpd instruction is present only in AVX1 1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1608 switch (opcode) { 1609 case Op_MaxVHF: 1610 case Op_MinVHF: 1611 if (!VM_Version::supports_avx512bw()) { 1612 return false; 1613 } 1614 case Op_AddVHF: 1615 case Op_DivVHF: 1616 case Op_FmaVHF: 1617 case Op_MulVHF: 1618 case Op_SubVHF: 1619 case Op_SqrtVHF: 1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1621 return false; 1622 } 1623 if (!VM_Version::supports_avx512_fp16()) { 1624 return false; 1625 } 1626 break; 1627 case Op_AbsVF: 1628 case Op_NegVF: 1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1630 return false; // 512bit vandps and vxorps are not available 1631 } 1632 break; 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1637 } 1638 break; 1639 case Op_RotateRightV: 1640 case Op_RotateLeftV: 1641 if (bt != T_INT && bt != T_LONG) { 1642 return false; 1643 } // fallthrough 1644 case Op_MacroLogicV: 1645 if (!VM_Version::supports_evex() || 1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1647 return false; 1648 } 1649 break; 1650 case Op_ClearArray: 1651 case Op_VectorMaskGen: 1652 case Op_VectorCmpMasked: 1653 if (!VM_Version::supports_avx512bw()) { 1654 return false; 1655 } 1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1657 return false; 1658 } 1659 break; 1660 case Op_LoadVectorMasked: 1661 case Op_StoreVectorMasked: 1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1663 return false; 1664 } 1665 break; 1666 case Op_UMinV: 1667 case Op_UMaxV: 1668 if (UseAVX == 0) { 1669 return false; 1670 } 1671 break; 1672 case Op_MaxV: 1673 case Op_MinV: 1674 if (UseSSE < 4 && is_integral_type(bt)) { 1675 return false; 1676 } 1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1678 // Float/Double intrinsics are enabled for AVX family currently. 1679 if (UseAVX == 0) { 1680 return false; 1681 } 1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1683 return false; 1684 } 1685 } 1686 break; 1687 case Op_CallLeafVector: 1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1689 return false; 1690 } 1691 break; 1692 case Op_AddReductionVI: 1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1694 return false; 1695 } 1696 // fallthrough 1697 case Op_AndReductionV: 1698 case Op_OrReductionV: 1699 case Op_XorReductionV: 1700 if (is_subword_type(bt) && (UseSSE < 4)) { 1701 return false; 1702 } 1703 break; 1704 case Op_MinReductionV: 1705 case Op_MaxReductionV: 1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1707 return false; 1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1709 return false; 1710 } 1711 // Float/Double intrinsics enabled for AVX family. 1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1713 return false; 1714 } 1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1716 return false; 1717 } 1718 break; 1719 case Op_VectorTest: 1720 if (UseSSE < 4) { 1721 return false; // Implementation limitation 1722 } else if (size_in_bits < 32) { 1723 return false; // Implementation limitation 1724 } 1725 break; 1726 case Op_VectorLoadShuffle: 1727 case Op_VectorRearrange: 1728 if(vlen == 2) { 1729 return false; // Implementation limitation due to how shuffle is loaded 1730 } else if (size_in_bits == 256 && UseAVX < 2) { 1731 return false; // Implementation limitation 1732 } 1733 break; 1734 case Op_VectorLoadMask: 1735 case Op_VectorMaskCast: 1736 if (size_in_bits == 256 && UseAVX < 2) { 1737 return false; // Implementation limitation 1738 } 1739 // fallthrough 1740 case Op_VectorStoreMask: 1741 if (vlen == 2) { 1742 return false; // Implementation limitation 1743 } 1744 break; 1745 case Op_PopulateIndex: 1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1747 return false; 1748 } 1749 break; 1750 case Op_VectorCastB2X: 1751 case Op_VectorCastS2X: 1752 case Op_VectorCastI2X: 1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastL2X: 1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1759 return false; 1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastF2X: { 1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1766 // happen after intermediate conversion to integer and special handling 1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1770 return false; 1771 } 1772 } 1773 // fallthrough 1774 case Op_VectorCastD2X: 1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1776 return false; 1777 } 1778 break; 1779 case Op_VectorCastF2HF: 1780 case Op_VectorCastHF2F: 1781 if (!VM_Version::supports_f16c() && 1782 ((!VM_Version::supports_evex() || 1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1784 return false; 1785 } 1786 break; 1787 case Op_RoundVD: 1788 if (!VM_Version::supports_avx512dq()) { 1789 return false; 1790 } 1791 break; 1792 case Op_MulReductionVI: 1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1794 return false; 1795 } 1796 break; 1797 case Op_LoadVectorGatherMasked: 1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1799 return false; 1800 } 1801 if (is_subword_type(bt) && 1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1803 (size_in_bits < 64) || 1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1805 return false; 1806 } 1807 break; 1808 case Op_StoreVectorScatterMasked: 1809 case Op_StoreVectorScatter: 1810 if (is_subword_type(bt)) { 1811 return false; 1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1813 return false; 1814 } 1815 // fallthrough 1816 case Op_LoadVectorGather: 1817 if (!is_subword_type(bt) && size_in_bits == 64) { 1818 return false; 1819 } 1820 if (is_subword_type(bt) && size_in_bits < 64) { 1821 return false; 1822 } 1823 break; 1824 case Op_SaturatingAddV: 1825 case Op_SaturatingSubV: 1826 if (UseAVX < 1) { 1827 return false; // Implementation limitation 1828 } 1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1830 return false; 1831 } 1832 break; 1833 case Op_SelectFromTwoVector: 1834 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1835 return false; 1836 } 1837 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1838 return false; 1839 } 1840 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1841 return false; 1842 } 1843 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1844 return false; 1845 } 1846 break; 1847 case Op_MaskAll: 1848 if (!VM_Version::supports_evex()) { 1849 return false; 1850 } 1851 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1852 return false; 1853 } 1854 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1855 return false; 1856 } 1857 break; 1858 case Op_VectorMaskCmp: 1859 if (vlen < 2 || size_in_bits < 32) { 1860 return false; 1861 } 1862 break; 1863 case Op_CompressM: 1864 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1865 return false; 1866 } 1867 break; 1868 case Op_CompressV: 1869 case Op_ExpandV: 1870 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1871 return false; 1872 } 1873 if (size_in_bits < 128 ) { 1874 return false; 1875 } 1876 case Op_VectorLongToMask: 1877 if (UseAVX < 1) { 1878 return false; 1879 } 1880 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1881 return false; 1882 } 1883 break; 1884 case Op_SignumVD: 1885 case Op_SignumVF: 1886 if (UseAVX < 1) { 1887 return false; 1888 } 1889 break; 1890 case Op_PopCountVI: 1891 case Op_PopCountVL: { 1892 if (!is_pop_count_instr_target(bt) && 1893 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1894 return false; 1895 } 1896 } 1897 break; 1898 case Op_ReverseV: 1899 case Op_ReverseBytesV: 1900 if (UseAVX < 2) { 1901 return false; 1902 } 1903 break; 1904 case Op_CountTrailingZerosV: 1905 case Op_CountLeadingZerosV: 1906 if (UseAVX < 2) { 1907 return false; 1908 } 1909 break; 1910 } 1911 return true; // Per default match rules are supported. 1912 } 1913 1914 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1915 // ADLC based match_rule_supported routine checks for the existence of pattern based 1916 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1917 // of their non-masked counterpart with mask edge being the differentiator. 1918 // This routine does a strict check on the existence of masked operation patterns 1919 // by returning a default false value for all the other opcodes apart from the 1920 // ones whose masked instruction patterns are defined in this file. 1921 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1922 return false; 1923 } 1924 1925 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1926 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 switch(opcode) { 1930 // Unary masked operations 1931 case Op_AbsVB: 1932 case Op_AbsVS: 1933 if(!VM_Version::supports_avx512bw()) { 1934 return false; // Implementation limitation 1935 } 1936 case Op_AbsVI: 1937 case Op_AbsVL: 1938 return true; 1939 1940 // Ternary masked operations 1941 case Op_FmaVF: 1942 case Op_FmaVD: 1943 return true; 1944 1945 case Op_MacroLogicV: 1946 if(bt != T_INT && bt != T_LONG) { 1947 return false; 1948 } 1949 return true; 1950 1951 // Binary masked operations 1952 case Op_AddVB: 1953 case Op_AddVS: 1954 case Op_SubVB: 1955 case Op_SubVS: 1956 case Op_MulVS: 1957 case Op_LShiftVS: 1958 case Op_RShiftVS: 1959 case Op_URShiftVS: 1960 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1961 if (!VM_Version::supports_avx512bw()) { 1962 return false; // Implementation limitation 1963 } 1964 return true; 1965 1966 case Op_MulVL: 1967 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1968 if (!VM_Version::supports_avx512dq()) { 1969 return false; // Implementation limitation 1970 } 1971 return true; 1972 1973 case Op_AndV: 1974 case Op_OrV: 1975 case Op_XorV: 1976 case Op_RotateRightV: 1977 case Op_RotateLeftV: 1978 if (bt != T_INT && bt != T_LONG) { 1979 return false; // Implementation limitation 1980 } 1981 return true; 1982 1983 case Op_VectorLoadMask: 1984 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1985 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 return true; 1989 1990 case Op_AddVI: 1991 case Op_AddVL: 1992 case Op_AddVF: 1993 case Op_AddVD: 1994 case Op_SubVI: 1995 case Op_SubVL: 1996 case Op_SubVF: 1997 case Op_SubVD: 1998 case Op_MulVI: 1999 case Op_MulVF: 2000 case Op_MulVD: 2001 case Op_DivVF: 2002 case Op_DivVD: 2003 case Op_SqrtVF: 2004 case Op_SqrtVD: 2005 case Op_LShiftVI: 2006 case Op_LShiftVL: 2007 case Op_RShiftVI: 2008 case Op_RShiftVL: 2009 case Op_URShiftVI: 2010 case Op_URShiftVL: 2011 case Op_LoadVectorMasked: 2012 case Op_StoreVectorMasked: 2013 case Op_LoadVectorGatherMasked: 2014 case Op_StoreVectorScatterMasked: 2015 return true; 2016 2017 case Op_UMinV: 2018 case Op_UMaxV: 2019 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2020 return false; 2021 } // fallthrough 2022 case Op_MaxV: 2023 case Op_MinV: 2024 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2025 return false; // Implementation limitation 2026 } 2027 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { 2028 return false; // Implementation limitation 2029 } 2030 return true; 2031 case Op_SaturatingAddV: 2032 case Op_SaturatingSubV: 2033 if (!is_subword_type(bt)) { 2034 return false; 2035 } 2036 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2037 return false; // Implementation limitation 2038 } 2039 return true; 2040 2041 case Op_VectorMaskCmp: 2042 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 return true; 2046 2047 case Op_VectorRearrange: 2048 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2049 return false; // Implementation limitation 2050 } 2051 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2052 return false; // Implementation limitation 2053 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2054 return false; // Implementation limitation 2055 } 2056 return true; 2057 2058 // Binary Logical operations 2059 case Op_AndVMask: 2060 case Op_OrVMask: 2061 case Op_XorVMask: 2062 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2063 return false; // Implementation limitation 2064 } 2065 return true; 2066 2067 case Op_PopCountVI: 2068 case Op_PopCountVL: 2069 if (!is_pop_count_instr_target(bt)) { 2070 return false; 2071 } 2072 return true; 2073 2074 case Op_MaskAll: 2075 return true; 2076 2077 case Op_CountLeadingZerosV: 2078 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2079 return true; 2080 } 2081 default: 2082 return false; 2083 } 2084 } 2085 2086 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2087 return false; 2088 } 2089 2090 // Return true if Vector::rearrange needs preparation of the shuffle argument 2091 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2092 switch (elem_bt) { 2093 case T_BYTE: return false; 2094 case T_SHORT: return !VM_Version::supports_avx512bw(); 2095 case T_INT: return !VM_Version::supports_avx(); 2096 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2097 default: 2098 ShouldNotReachHere(); 2099 return false; 2100 } 2101 } 2102 2103 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2104 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2105 bool legacy = (generic_opnd->opcode() == LEGVEC); 2106 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2107 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2108 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2109 return new legVecZOper(); 2110 } 2111 if (legacy) { 2112 switch (ideal_reg) { 2113 case Op_VecS: return new legVecSOper(); 2114 case Op_VecD: return new legVecDOper(); 2115 case Op_VecX: return new legVecXOper(); 2116 case Op_VecY: return new legVecYOper(); 2117 case Op_VecZ: return new legVecZOper(); 2118 } 2119 } else { 2120 switch (ideal_reg) { 2121 case Op_VecS: return new vecSOper(); 2122 case Op_VecD: return new vecDOper(); 2123 case Op_VecX: return new vecXOper(); 2124 case Op_VecY: return new vecYOper(); 2125 case Op_VecZ: return new vecZOper(); 2126 } 2127 } 2128 ShouldNotReachHere(); 2129 return nullptr; 2130 } 2131 2132 bool Matcher::is_reg2reg_move(MachNode* m) { 2133 switch (m->rule()) { 2134 case MoveVec2Leg_rule: 2135 case MoveLeg2Vec_rule: 2136 case MoveF2VL_rule: 2137 case MoveF2LEG_rule: 2138 case MoveVL2F_rule: 2139 case MoveLEG2F_rule: 2140 case MoveD2VL_rule: 2141 case MoveD2LEG_rule: 2142 case MoveVL2D_rule: 2143 case MoveLEG2D_rule: 2144 return true; 2145 default: 2146 return false; 2147 } 2148 } 2149 2150 bool Matcher::is_generic_vector(MachOper* opnd) { 2151 switch (opnd->opcode()) { 2152 case VEC: 2153 case LEGVEC: 2154 return true; 2155 default: 2156 return false; 2157 } 2158 } 2159 2160 //------------------------------------------------------------------------ 2161 2162 const RegMask* Matcher::predicate_reg_mask(void) { 2163 return &_VECTMASK_REG_mask; 2164 } 2165 2166 // Max vector size in bytes. 0 if not supported. 2167 int Matcher::vector_width_in_bytes(BasicType bt) { 2168 assert(is_java_primitive(bt), "only primitive type vectors"); 2169 // SSE2 supports 128bit vectors for all types. 2170 // AVX2 supports 256bit vectors for all types. 2171 // AVX2/EVEX supports 512bit vectors for all types. 2172 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2173 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2174 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2175 size = (UseAVX > 2) ? 64 : 32; 2176 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2177 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2178 // Use flag to limit vector size. 2179 size = MIN2(size,(int)MaxVectorSize); 2180 // Minimum 2 values in vector (or 4 for bytes). 2181 switch (bt) { 2182 case T_DOUBLE: 2183 case T_LONG: 2184 if (size < 16) return 0; 2185 break; 2186 case T_FLOAT: 2187 case T_INT: 2188 if (size < 8) return 0; 2189 break; 2190 case T_BOOLEAN: 2191 if (size < 4) return 0; 2192 break; 2193 case T_CHAR: 2194 if (size < 4) return 0; 2195 break; 2196 case T_BYTE: 2197 if (size < 4) return 0; 2198 break; 2199 case T_SHORT: 2200 if (size < 4) return 0; 2201 break; 2202 default: 2203 ShouldNotReachHere(); 2204 } 2205 return size; 2206 } 2207 2208 // Limits on vector size (number of elements) loaded into vector. 2209 int Matcher::max_vector_size(const BasicType bt) { 2210 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2211 } 2212 int Matcher::min_vector_size(const BasicType bt) { 2213 int max_size = max_vector_size(bt); 2214 // Min size which can be loaded into vector is 4 bytes. 2215 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2216 // Support for calling svml double64 vectors 2217 if (bt == T_DOUBLE) { 2218 size = 1; 2219 } 2220 return MIN2(size,max_size); 2221 } 2222 2223 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2224 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2225 // by default on Cascade Lake 2226 if (VM_Version::is_default_intel_cascade_lake()) { 2227 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2228 } 2229 return Matcher::max_vector_size(bt); 2230 } 2231 2232 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2233 return -1; 2234 } 2235 2236 // Vector ideal reg corresponding to specified size in bytes 2237 uint Matcher::vector_ideal_reg(int size) { 2238 assert(MaxVectorSize >= size, ""); 2239 switch(size) { 2240 case 4: return Op_VecS; 2241 case 8: return Op_VecD; 2242 case 16: return Op_VecX; 2243 case 32: return Op_VecY; 2244 case 64: return Op_VecZ; 2245 } 2246 ShouldNotReachHere(); 2247 return 0; 2248 } 2249 2250 // Check for shift by small constant as well 2251 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2252 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2253 shift->in(2)->get_int() <= 3 && 2254 // Are there other uses besides address expressions? 2255 !matcher->is_visited(shift)) { 2256 address_visited.set(shift->_idx); // Flag as address_visited 2257 mstack.push(shift->in(2), Matcher::Visit); 2258 Node *conv = shift->in(1); 2259 // Allow Matcher to match the rule which bypass 2260 // ConvI2L operation for an array index on LP64 2261 // if the index value is positive. 2262 if (conv->Opcode() == Op_ConvI2L && 2263 conv->as_Type()->type()->is_long()->_lo >= 0 && 2264 // Are there other uses besides address expressions? 2265 !matcher->is_visited(conv)) { 2266 address_visited.set(conv->_idx); // Flag as address_visited 2267 mstack.push(conv->in(1), Matcher::Pre_Visit); 2268 } else { 2269 mstack.push(conv, Matcher::Pre_Visit); 2270 } 2271 return true; 2272 } 2273 return false; 2274 } 2275 2276 // This function identifies sub-graphs in which a 'load' node is 2277 // input to two different nodes, and such that it can be matched 2278 // with BMI instructions like blsi, blsr, etc. 2279 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2280 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2281 // refers to the same node. 2282 // 2283 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2284 // This is a temporary solution until we make DAGs expressible in ADL. 2285 template<typename ConType> 2286 class FusedPatternMatcher { 2287 Node* _op1_node; 2288 Node* _mop_node; 2289 int _con_op; 2290 2291 static int match_next(Node* n, int next_op, int next_op_idx) { 2292 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2293 return -1; 2294 } 2295 2296 if (next_op_idx == -1) { // n is commutative, try rotations 2297 if (n->in(1)->Opcode() == next_op) { 2298 return 1; 2299 } else if (n->in(2)->Opcode() == next_op) { 2300 return 2; 2301 } 2302 } else { 2303 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2304 if (n->in(next_op_idx)->Opcode() == next_op) { 2305 return next_op_idx; 2306 } 2307 } 2308 return -1; 2309 } 2310 2311 public: 2312 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2313 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2314 2315 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2316 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2317 typename ConType::NativeType con_value) { 2318 if (_op1_node->Opcode() != op1) { 2319 return false; 2320 } 2321 if (_mop_node->outcnt() > 2) { 2322 return false; 2323 } 2324 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2325 if (op1_op2_idx == -1) { 2326 return false; 2327 } 2328 // Memory operation must be the other edge 2329 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2330 2331 // Check that the mop node is really what we want 2332 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2333 Node* op2_node = _op1_node->in(op1_op2_idx); 2334 if (op2_node->outcnt() > 1) { 2335 return false; 2336 } 2337 assert(op2_node->Opcode() == op2, "Should be"); 2338 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2339 if (op2_con_idx == -1) { 2340 return false; 2341 } 2342 // Memory operation must be the other edge 2343 int op2_mop_idx = (op2_con_idx & 1) + 1; 2344 // Check that the memory operation is the same node 2345 if (op2_node->in(op2_mop_idx) == _mop_node) { 2346 // Now check the constant 2347 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2348 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2349 return true; 2350 } 2351 } 2352 } 2353 return false; 2354 } 2355 }; 2356 2357 static bool is_bmi_pattern(Node* n, Node* m) { 2358 assert(UseBMI1Instructions, "sanity"); 2359 if (n != nullptr && m != nullptr) { 2360 if (m->Opcode() == Op_LoadI) { 2361 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2362 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2363 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2364 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2365 } else if (m->Opcode() == Op_LoadL) { 2366 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2367 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2368 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2369 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2370 } 2371 } 2372 return false; 2373 } 2374 2375 // Should the matcher clone input 'm' of node 'n'? 2376 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2377 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2378 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2379 mstack.push(m, Visit); 2380 return true; 2381 } 2382 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2383 mstack.push(m, Visit); // m = ShiftCntV 2384 return true; 2385 } 2386 if (is_encode_and_store_pattern(n, m)) { 2387 mstack.push(m, Visit); 2388 return true; 2389 } 2390 return false; 2391 } 2392 2393 // Should the Matcher clone shifts on addressing modes, expecting them 2394 // to be subsumed into complex addressing expressions or compute them 2395 // into registers? 2396 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2397 Node *off = m->in(AddPNode::Offset); 2398 if (off->is_Con()) { 2399 address_visited.test_set(m->_idx); // Flag as address_visited 2400 Node *adr = m->in(AddPNode::Address); 2401 2402 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2403 // AtomicAdd is not an addressing expression. 2404 // Cheap to find it by looking for screwy base. 2405 if (adr->is_AddP() && 2406 !adr->in(AddPNode::Base)->is_top() && 2407 !adr->in(AddPNode::Offset)->is_Con() && 2408 off->get_long() == (int) (off->get_long()) && // immL32 2409 // Are there other uses besides address expressions? 2410 !is_visited(adr)) { 2411 address_visited.set(adr->_idx); // Flag as address_visited 2412 Node *shift = adr->in(AddPNode::Offset); 2413 if (!clone_shift(shift, this, mstack, address_visited)) { 2414 mstack.push(shift, Pre_Visit); 2415 } 2416 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2417 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2418 } else { 2419 mstack.push(adr, Pre_Visit); 2420 } 2421 2422 // Clone X+offset as it also folds into most addressing expressions 2423 mstack.push(off, Visit); 2424 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2425 return true; 2426 } else if (clone_shift(off, this, mstack, address_visited)) { 2427 address_visited.test_set(m->_idx); // Flag as address_visited 2428 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2429 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2436 switch (bt) { 2437 case BoolTest::eq: 2438 return Assembler::eq; 2439 case BoolTest::ne: 2440 return Assembler::neq; 2441 case BoolTest::le: 2442 case BoolTest::ule: 2443 return Assembler::le; 2444 case BoolTest::ge: 2445 case BoolTest::uge: 2446 return Assembler::nlt; 2447 case BoolTest::lt: 2448 case BoolTest::ult: 2449 return Assembler::lt; 2450 case BoolTest::gt: 2451 case BoolTest::ugt: 2452 return Assembler::nle; 2453 default : ShouldNotReachHere(); return Assembler::_false; 2454 } 2455 } 2456 2457 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2458 switch (bt) { 2459 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2460 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2461 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2462 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2463 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2464 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2465 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2466 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2467 } 2468 } 2469 2470 // Helper methods for MachSpillCopyNode::implementation(). 2471 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2472 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2473 assert(ireg == Op_VecS || // 32bit vector 2474 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2475 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2476 "no non-adjacent vector moves" ); 2477 if (masm) { 2478 switch (ireg) { 2479 case Op_VecS: // copy whole register 2480 case Op_VecD: 2481 case Op_VecX: 2482 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2483 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2484 } else { 2485 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2486 } 2487 break; 2488 case Op_VecY: 2489 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2490 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2491 } else { 2492 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2493 } 2494 break; 2495 case Op_VecZ: 2496 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2497 break; 2498 default: 2499 ShouldNotReachHere(); 2500 } 2501 #ifndef PRODUCT 2502 } else { 2503 switch (ireg) { 2504 case Op_VecS: 2505 case Op_VecD: 2506 case Op_VecX: 2507 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2508 break; 2509 case Op_VecY: 2510 case Op_VecZ: 2511 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2512 break; 2513 default: 2514 ShouldNotReachHere(); 2515 } 2516 #endif 2517 } 2518 } 2519 2520 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2521 int stack_offset, int reg, uint ireg, outputStream* st) { 2522 if (masm) { 2523 if (is_load) { 2524 switch (ireg) { 2525 case Op_VecS: 2526 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2527 break; 2528 case Op_VecD: 2529 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2530 break; 2531 case Op_VecX: 2532 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2533 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2534 } else { 2535 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2536 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2537 } 2538 break; 2539 case Op_VecY: 2540 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2541 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2542 } else { 2543 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2544 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2545 } 2546 break; 2547 case Op_VecZ: 2548 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2549 break; 2550 default: 2551 ShouldNotReachHere(); 2552 } 2553 } else { // store 2554 switch (ireg) { 2555 case Op_VecS: 2556 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2557 break; 2558 case Op_VecD: 2559 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2560 break; 2561 case Op_VecX: 2562 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2563 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2564 } 2565 else { 2566 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2567 } 2568 break; 2569 case Op_VecY: 2570 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2571 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2572 } 2573 else { 2574 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2575 } 2576 break; 2577 case Op_VecZ: 2578 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2579 break; 2580 default: 2581 ShouldNotReachHere(); 2582 } 2583 } 2584 #ifndef PRODUCT 2585 } else { 2586 if (is_load) { 2587 switch (ireg) { 2588 case Op_VecS: 2589 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2590 break; 2591 case Op_VecD: 2592 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2593 break; 2594 case Op_VecX: 2595 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2596 break; 2597 case Op_VecY: 2598 case Op_VecZ: 2599 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2600 break; 2601 default: 2602 ShouldNotReachHere(); 2603 } 2604 } else { // store 2605 switch (ireg) { 2606 case Op_VecS: 2607 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2608 break; 2609 case Op_VecD: 2610 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2611 break; 2612 case Op_VecX: 2613 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2614 break; 2615 case Op_VecY: 2616 case Op_VecZ: 2617 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 } 2623 #endif 2624 } 2625 } 2626 2627 template <class T> 2628 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2629 int size = type2aelembytes(bt) * len; 2630 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2631 for (int i = 0; i < len; i++) { 2632 int offset = i * type2aelembytes(bt); 2633 switch (bt) { 2634 case T_BYTE: val->at(i) = con; break; 2635 case T_SHORT: { 2636 jshort c = con; 2637 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2638 break; 2639 } 2640 case T_INT: { 2641 jint c = con; 2642 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2643 break; 2644 } 2645 case T_LONG: { 2646 jlong c = con; 2647 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2648 break; 2649 } 2650 case T_FLOAT: { 2651 jfloat c = con; 2652 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2653 break; 2654 } 2655 case T_DOUBLE: { 2656 jdouble c = con; 2657 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2658 break; 2659 } 2660 default: assert(false, "%s", type2name(bt)); 2661 } 2662 } 2663 return val; 2664 } 2665 2666 static inline jlong high_bit_set(BasicType bt) { 2667 switch (bt) { 2668 case T_BYTE: return 0x8080808080808080; 2669 case T_SHORT: return 0x8000800080008000; 2670 case T_INT: return 0x8000000080000000; 2671 case T_LONG: return 0x8000000000000000; 2672 default: 2673 ShouldNotReachHere(); 2674 return 0; 2675 } 2676 } 2677 2678 #ifndef PRODUCT 2679 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2680 st->print("nop \t# %d bytes pad for loops and calls", _count); 2681 } 2682 #endif 2683 2684 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2685 __ nop(_count); 2686 } 2687 2688 uint MachNopNode::size(PhaseRegAlloc*) const { 2689 return _count; 2690 } 2691 2692 #ifndef PRODUCT 2693 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2694 st->print("# breakpoint"); 2695 } 2696 #endif 2697 2698 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2699 __ int3(); 2700 } 2701 2702 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2703 return MachNode::size(ra_); 2704 } 2705 2706 %} 2707 2708 encode %{ 2709 2710 enc_class call_epilog %{ 2711 if (VerifyStackAtCalls) { 2712 // Check that stack depth is unchanged: find majik cookie on stack 2713 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2714 Label L; 2715 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2716 __ jccb(Assembler::equal, L); 2717 // Die if stack mismatch 2718 __ int3(); 2719 __ bind(L); 2720 } 2721 %} 2722 2723 %} 2724 2725 // Operands for bound floating pointer register arguments 2726 operand rxmm0() %{ 2727 constraint(ALLOC_IN_RC(xmm0_reg)); 2728 match(VecX); 2729 format%{%} 2730 interface(REG_INTER); 2731 %} 2732 2733 //----------OPERANDS----------------------------------------------------------- 2734 // Operand definitions must precede instruction definitions for correct parsing 2735 // in the ADLC because operands constitute user defined types which are used in 2736 // instruction definitions. 2737 2738 // Vectors 2739 2740 // Dummy generic vector class. Should be used for all vector operands. 2741 // Replaced with vec[SDXYZ] during post-selection pass. 2742 operand vec() %{ 2743 constraint(ALLOC_IN_RC(dynamic)); 2744 match(VecX); 2745 match(VecY); 2746 match(VecZ); 2747 match(VecS); 2748 match(VecD); 2749 2750 format %{ %} 2751 interface(REG_INTER); 2752 %} 2753 2754 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2755 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2756 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2757 // runtime code generation via reg_class_dynamic. 2758 operand legVec() %{ 2759 constraint(ALLOC_IN_RC(dynamic)); 2760 match(VecX); 2761 match(VecY); 2762 match(VecZ); 2763 match(VecS); 2764 match(VecD); 2765 2766 format %{ %} 2767 interface(REG_INTER); 2768 %} 2769 2770 // Replaces vec during post-selection cleanup. See above. 2771 operand vecS() %{ 2772 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2773 match(VecS); 2774 2775 format %{ %} 2776 interface(REG_INTER); 2777 %} 2778 2779 // Replaces legVec during post-selection cleanup. See above. 2780 operand legVecS() %{ 2781 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2782 match(VecS); 2783 2784 format %{ %} 2785 interface(REG_INTER); 2786 %} 2787 2788 // Replaces vec during post-selection cleanup. See above. 2789 operand vecD() %{ 2790 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2791 match(VecD); 2792 2793 format %{ %} 2794 interface(REG_INTER); 2795 %} 2796 2797 // Replaces legVec during post-selection cleanup. See above. 2798 operand legVecD() %{ 2799 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2800 match(VecD); 2801 2802 format %{ %} 2803 interface(REG_INTER); 2804 %} 2805 2806 // Replaces vec during post-selection cleanup. See above. 2807 operand vecX() %{ 2808 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2809 match(VecX); 2810 2811 format %{ %} 2812 interface(REG_INTER); 2813 %} 2814 2815 // Replaces legVec during post-selection cleanup. See above. 2816 operand legVecX() %{ 2817 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2818 match(VecX); 2819 2820 format %{ %} 2821 interface(REG_INTER); 2822 %} 2823 2824 // Replaces vec during post-selection cleanup. See above. 2825 operand vecY() %{ 2826 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2827 match(VecY); 2828 2829 format %{ %} 2830 interface(REG_INTER); 2831 %} 2832 2833 // Replaces legVec during post-selection cleanup. See above. 2834 operand legVecY() %{ 2835 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2836 match(VecY); 2837 2838 format %{ %} 2839 interface(REG_INTER); 2840 %} 2841 2842 // Replaces vec during post-selection cleanup. See above. 2843 operand vecZ() %{ 2844 constraint(ALLOC_IN_RC(vectorz_reg)); 2845 match(VecZ); 2846 2847 format %{ %} 2848 interface(REG_INTER); 2849 %} 2850 2851 // Replaces legVec during post-selection cleanup. See above. 2852 operand legVecZ() %{ 2853 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2854 match(VecZ); 2855 2856 format %{ %} 2857 interface(REG_INTER); 2858 %} 2859 2860 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2861 2862 // ============================================================================ 2863 2864 instruct ShouldNotReachHere() %{ 2865 match(Halt); 2866 format %{ "stop\t# ShouldNotReachHere" %} 2867 ins_encode %{ 2868 if (is_reachable()) { 2869 const char* str = __ code_string(_halt_reason); 2870 __ stop(str); 2871 } 2872 %} 2873 ins_pipe(pipe_slow); 2874 %} 2875 2876 // ============================================================================ 2877 2878 instruct addF_reg(regF dst, regF src) %{ 2879 predicate(UseAVX == 0); 2880 match(Set dst (AddF dst src)); 2881 2882 format %{ "addss $dst, $src" %} 2883 ins_cost(150); 2884 ins_encode %{ 2885 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2886 %} 2887 ins_pipe(pipe_slow); 2888 %} 2889 2890 instruct addF_mem(regF dst, memory src) %{ 2891 predicate(UseAVX == 0); 2892 match(Set dst (AddF dst (LoadF src))); 2893 2894 format %{ "addss $dst, $src" %} 2895 ins_cost(150); 2896 ins_encode %{ 2897 __ addss($dst$$XMMRegister, $src$$Address); 2898 %} 2899 ins_pipe(pipe_slow); 2900 %} 2901 2902 instruct addF_imm(regF dst, immF con) %{ 2903 predicate(UseAVX == 0); 2904 match(Set dst (AddF dst con)); 2905 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2906 ins_cost(150); 2907 ins_encode %{ 2908 __ addss($dst$$XMMRegister, $constantaddress($con)); 2909 %} 2910 ins_pipe(pipe_slow); 2911 %} 2912 2913 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2914 predicate(UseAVX > 0); 2915 match(Set dst (AddF src1 src2)); 2916 2917 format %{ "vaddss $dst, $src1, $src2" %} 2918 ins_cost(150); 2919 ins_encode %{ 2920 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2921 %} 2922 ins_pipe(pipe_slow); 2923 %} 2924 2925 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2926 predicate(UseAVX > 0); 2927 match(Set dst (AddF src1 (LoadF src2))); 2928 2929 format %{ "vaddss $dst, $src1, $src2" %} 2930 ins_cost(150); 2931 ins_encode %{ 2932 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2933 %} 2934 ins_pipe(pipe_slow); 2935 %} 2936 2937 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2938 predicate(UseAVX > 0); 2939 match(Set dst (AddF src con)); 2940 2941 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2942 ins_cost(150); 2943 ins_encode %{ 2944 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2945 %} 2946 ins_pipe(pipe_slow); 2947 %} 2948 2949 instruct addD_reg(regD dst, regD src) %{ 2950 predicate(UseAVX == 0); 2951 match(Set dst (AddD dst src)); 2952 2953 format %{ "addsd $dst, $src" %} 2954 ins_cost(150); 2955 ins_encode %{ 2956 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2957 %} 2958 ins_pipe(pipe_slow); 2959 %} 2960 2961 instruct addD_mem(regD dst, memory src) %{ 2962 predicate(UseAVX == 0); 2963 match(Set dst (AddD dst (LoadD src))); 2964 2965 format %{ "addsd $dst, $src" %} 2966 ins_cost(150); 2967 ins_encode %{ 2968 __ addsd($dst$$XMMRegister, $src$$Address); 2969 %} 2970 ins_pipe(pipe_slow); 2971 %} 2972 2973 instruct addD_imm(regD dst, immD con) %{ 2974 predicate(UseAVX == 0); 2975 match(Set dst (AddD dst con)); 2976 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2977 ins_cost(150); 2978 ins_encode %{ 2979 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2985 predicate(UseAVX > 0); 2986 match(Set dst (AddD src1 src2)); 2987 2988 format %{ "vaddsd $dst, $src1, $src2" %} 2989 ins_cost(150); 2990 ins_encode %{ 2991 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2997 predicate(UseAVX > 0); 2998 match(Set dst (AddD src1 (LoadD src2))); 2999 3000 format %{ "vaddsd $dst, $src1, $src2" %} 3001 ins_cost(150); 3002 ins_encode %{ 3003 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3009 predicate(UseAVX > 0); 3010 match(Set dst (AddD src con)); 3011 3012 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3013 ins_cost(150); 3014 ins_encode %{ 3015 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct subF_reg(regF dst, regF src) %{ 3021 predicate(UseAVX == 0); 3022 match(Set dst (SubF dst src)); 3023 3024 format %{ "subss $dst, $src" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct subF_mem(regF dst, memory src) %{ 3033 predicate(UseAVX == 0); 3034 match(Set dst (SubF dst (LoadF src))); 3035 3036 format %{ "subss $dst, $src" %} 3037 ins_cost(150); 3038 ins_encode %{ 3039 __ subss($dst$$XMMRegister, $src$$Address); 3040 %} 3041 ins_pipe(pipe_slow); 3042 %} 3043 3044 instruct subF_imm(regF dst, immF con) %{ 3045 predicate(UseAVX == 0); 3046 match(Set dst (SubF dst con)); 3047 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ subss($dst$$XMMRegister, $constantaddress($con)); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3056 predicate(UseAVX > 0); 3057 match(Set dst (SubF src1 src2)); 3058 3059 format %{ "vsubss $dst, $src1, $src2" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3068 predicate(UseAVX > 0); 3069 match(Set dst (SubF src1 (LoadF src2))); 3070 3071 format %{ "vsubss $dst, $src1, $src2" %} 3072 ins_cost(150); 3073 ins_encode %{ 3074 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3075 %} 3076 ins_pipe(pipe_slow); 3077 %} 3078 3079 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3080 predicate(UseAVX > 0); 3081 match(Set dst (SubF src con)); 3082 3083 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3084 ins_cost(150); 3085 ins_encode %{ 3086 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3087 %} 3088 ins_pipe(pipe_slow); 3089 %} 3090 3091 instruct subD_reg(regD dst, regD src) %{ 3092 predicate(UseAVX == 0); 3093 match(Set dst (SubD dst src)); 3094 3095 format %{ "subsd $dst, $src" %} 3096 ins_cost(150); 3097 ins_encode %{ 3098 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3099 %} 3100 ins_pipe(pipe_slow); 3101 %} 3102 3103 instruct subD_mem(regD dst, memory src) %{ 3104 predicate(UseAVX == 0); 3105 match(Set dst (SubD dst (LoadD src))); 3106 3107 format %{ "subsd $dst, $src" %} 3108 ins_cost(150); 3109 ins_encode %{ 3110 __ subsd($dst$$XMMRegister, $src$$Address); 3111 %} 3112 ins_pipe(pipe_slow); 3113 %} 3114 3115 instruct subD_imm(regD dst, immD con) %{ 3116 predicate(UseAVX == 0); 3117 match(Set dst (SubD dst con)); 3118 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3127 predicate(UseAVX > 0); 3128 match(Set dst (SubD src1 src2)); 3129 3130 format %{ "vsubsd $dst, $src1, $src2" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3139 predicate(UseAVX > 0); 3140 match(Set dst (SubD src1 (LoadD src2))); 3141 3142 format %{ "vsubsd $dst, $src1, $src2" %} 3143 ins_cost(150); 3144 ins_encode %{ 3145 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3146 %} 3147 ins_pipe(pipe_slow); 3148 %} 3149 3150 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3151 predicate(UseAVX > 0); 3152 match(Set dst (SubD src con)); 3153 3154 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3155 ins_cost(150); 3156 ins_encode %{ 3157 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3158 %} 3159 ins_pipe(pipe_slow); 3160 %} 3161 3162 instruct mulF_reg(regF dst, regF src) %{ 3163 predicate(UseAVX == 0); 3164 match(Set dst (MulF dst src)); 3165 3166 format %{ "mulss $dst, $src" %} 3167 ins_cost(150); 3168 ins_encode %{ 3169 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3170 %} 3171 ins_pipe(pipe_slow); 3172 %} 3173 3174 instruct mulF_mem(regF dst, memory src) %{ 3175 predicate(UseAVX == 0); 3176 match(Set dst (MulF dst (LoadF src))); 3177 3178 format %{ "mulss $dst, $src" %} 3179 ins_cost(150); 3180 ins_encode %{ 3181 __ mulss($dst$$XMMRegister, $src$$Address); 3182 %} 3183 ins_pipe(pipe_slow); 3184 %} 3185 3186 instruct mulF_imm(regF dst, immF con) %{ 3187 predicate(UseAVX == 0); 3188 match(Set dst (MulF dst con)); 3189 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3190 ins_cost(150); 3191 ins_encode %{ 3192 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3193 %} 3194 ins_pipe(pipe_slow); 3195 %} 3196 3197 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3198 predicate(UseAVX > 0); 3199 match(Set dst (MulF src1 src2)); 3200 3201 format %{ "vmulss $dst, $src1, $src2" %} 3202 ins_cost(150); 3203 ins_encode %{ 3204 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3210 predicate(UseAVX > 0); 3211 match(Set dst (MulF src1 (LoadF src2))); 3212 3213 format %{ "vmulss $dst, $src1, $src2" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3222 predicate(UseAVX > 0); 3223 match(Set dst (MulF src con)); 3224 3225 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3226 ins_cost(150); 3227 ins_encode %{ 3228 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3229 %} 3230 ins_pipe(pipe_slow); 3231 %} 3232 3233 instruct mulD_reg(regD dst, regD src) %{ 3234 predicate(UseAVX == 0); 3235 match(Set dst (MulD dst src)); 3236 3237 format %{ "mulsd $dst, $src" %} 3238 ins_cost(150); 3239 ins_encode %{ 3240 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3241 %} 3242 ins_pipe(pipe_slow); 3243 %} 3244 3245 instruct mulD_mem(regD dst, memory src) %{ 3246 predicate(UseAVX == 0); 3247 match(Set dst (MulD dst (LoadD src))); 3248 3249 format %{ "mulsd $dst, $src" %} 3250 ins_cost(150); 3251 ins_encode %{ 3252 __ mulsd($dst$$XMMRegister, $src$$Address); 3253 %} 3254 ins_pipe(pipe_slow); 3255 %} 3256 3257 instruct mulD_imm(regD dst, immD con) %{ 3258 predicate(UseAVX == 0); 3259 match(Set dst (MulD dst con)); 3260 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3261 ins_cost(150); 3262 ins_encode %{ 3263 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3264 %} 3265 ins_pipe(pipe_slow); 3266 %} 3267 3268 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3269 predicate(UseAVX > 0); 3270 match(Set dst (MulD src1 src2)); 3271 3272 format %{ "vmulsd $dst, $src1, $src2" %} 3273 ins_cost(150); 3274 ins_encode %{ 3275 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3276 %} 3277 ins_pipe(pipe_slow); 3278 %} 3279 3280 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3281 predicate(UseAVX > 0); 3282 match(Set dst (MulD src1 (LoadD src2))); 3283 3284 format %{ "vmulsd $dst, $src1, $src2" %} 3285 ins_cost(150); 3286 ins_encode %{ 3287 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3288 %} 3289 ins_pipe(pipe_slow); 3290 %} 3291 3292 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3293 predicate(UseAVX > 0); 3294 match(Set dst (MulD src con)); 3295 3296 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3297 ins_cost(150); 3298 ins_encode %{ 3299 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3300 %} 3301 ins_pipe(pipe_slow); 3302 %} 3303 3304 instruct divF_reg(regF dst, regF src) %{ 3305 predicate(UseAVX == 0); 3306 match(Set dst (DivF dst src)); 3307 3308 format %{ "divss $dst, $src" %} 3309 ins_cost(150); 3310 ins_encode %{ 3311 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3312 %} 3313 ins_pipe(pipe_slow); 3314 %} 3315 3316 instruct divF_mem(regF dst, memory src) %{ 3317 predicate(UseAVX == 0); 3318 match(Set dst (DivF dst (LoadF src))); 3319 3320 format %{ "divss $dst, $src" %} 3321 ins_cost(150); 3322 ins_encode %{ 3323 __ divss($dst$$XMMRegister, $src$$Address); 3324 %} 3325 ins_pipe(pipe_slow); 3326 %} 3327 3328 instruct divF_imm(regF dst, immF con) %{ 3329 predicate(UseAVX == 0); 3330 match(Set dst (DivF dst con)); 3331 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3332 ins_cost(150); 3333 ins_encode %{ 3334 __ divss($dst$$XMMRegister, $constantaddress($con)); 3335 %} 3336 ins_pipe(pipe_slow); 3337 %} 3338 3339 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3340 predicate(UseAVX > 0); 3341 match(Set dst (DivF src1 src2)); 3342 3343 format %{ "vdivss $dst, $src1, $src2" %} 3344 ins_cost(150); 3345 ins_encode %{ 3346 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3347 %} 3348 ins_pipe(pipe_slow); 3349 %} 3350 3351 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3352 predicate(UseAVX > 0); 3353 match(Set dst (DivF src1 (LoadF src2))); 3354 3355 format %{ "vdivss $dst, $src1, $src2" %} 3356 ins_cost(150); 3357 ins_encode %{ 3358 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3359 %} 3360 ins_pipe(pipe_slow); 3361 %} 3362 3363 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3364 predicate(UseAVX > 0); 3365 match(Set dst (DivF src con)); 3366 3367 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3368 ins_cost(150); 3369 ins_encode %{ 3370 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3371 %} 3372 ins_pipe(pipe_slow); 3373 %} 3374 3375 instruct divD_reg(regD dst, regD src) %{ 3376 predicate(UseAVX == 0); 3377 match(Set dst (DivD dst src)); 3378 3379 format %{ "divsd $dst, $src" %} 3380 ins_cost(150); 3381 ins_encode %{ 3382 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3383 %} 3384 ins_pipe(pipe_slow); 3385 %} 3386 3387 instruct divD_mem(regD dst, memory src) %{ 3388 predicate(UseAVX == 0); 3389 match(Set dst (DivD dst (LoadD src))); 3390 3391 format %{ "divsd $dst, $src" %} 3392 ins_cost(150); 3393 ins_encode %{ 3394 __ divsd($dst$$XMMRegister, $src$$Address); 3395 %} 3396 ins_pipe(pipe_slow); 3397 %} 3398 3399 instruct divD_imm(regD dst, immD con) %{ 3400 predicate(UseAVX == 0); 3401 match(Set dst (DivD dst con)); 3402 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3403 ins_cost(150); 3404 ins_encode %{ 3405 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3406 %} 3407 ins_pipe(pipe_slow); 3408 %} 3409 3410 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3411 predicate(UseAVX > 0); 3412 match(Set dst (DivD src1 src2)); 3413 3414 format %{ "vdivsd $dst, $src1, $src2" %} 3415 ins_cost(150); 3416 ins_encode %{ 3417 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3418 %} 3419 ins_pipe(pipe_slow); 3420 %} 3421 3422 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3423 predicate(UseAVX > 0); 3424 match(Set dst (DivD src1 (LoadD src2))); 3425 3426 format %{ "vdivsd $dst, $src1, $src2" %} 3427 ins_cost(150); 3428 ins_encode %{ 3429 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3430 %} 3431 ins_pipe(pipe_slow); 3432 %} 3433 3434 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3435 predicate(UseAVX > 0); 3436 match(Set dst (DivD src con)); 3437 3438 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3439 ins_cost(150); 3440 ins_encode %{ 3441 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3442 %} 3443 ins_pipe(pipe_slow); 3444 %} 3445 3446 instruct absF_reg(regF dst) %{ 3447 predicate(UseAVX == 0); 3448 match(Set dst (AbsF dst)); 3449 ins_cost(150); 3450 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3451 ins_encode %{ 3452 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3453 %} 3454 ins_pipe(pipe_slow); 3455 %} 3456 3457 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3458 predicate(UseAVX > 0); 3459 match(Set dst (AbsF src)); 3460 ins_cost(150); 3461 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3462 ins_encode %{ 3463 int vlen_enc = Assembler::AVX_128bit; 3464 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3465 ExternalAddress(float_signmask()), vlen_enc); 3466 %} 3467 ins_pipe(pipe_slow); 3468 %} 3469 3470 instruct absD_reg(regD dst) %{ 3471 predicate(UseAVX == 0); 3472 match(Set dst (AbsD dst)); 3473 ins_cost(150); 3474 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3475 "# abs double by sign masking" %} 3476 ins_encode %{ 3477 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3478 %} 3479 ins_pipe(pipe_slow); 3480 %} 3481 3482 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3483 predicate(UseAVX > 0); 3484 match(Set dst (AbsD src)); 3485 ins_cost(150); 3486 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3487 "# abs double by sign masking" %} 3488 ins_encode %{ 3489 int vlen_enc = Assembler::AVX_128bit; 3490 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3491 ExternalAddress(double_signmask()), vlen_enc); 3492 %} 3493 ins_pipe(pipe_slow); 3494 %} 3495 3496 instruct negF_reg(regF dst) %{ 3497 predicate(UseAVX == 0); 3498 match(Set dst (NegF dst)); 3499 ins_cost(150); 3500 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3501 ins_encode %{ 3502 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3503 %} 3504 ins_pipe(pipe_slow); 3505 %} 3506 3507 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3508 predicate(UseAVX > 0); 3509 match(Set dst (NegF src)); 3510 ins_cost(150); 3511 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3512 ins_encode %{ 3513 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3514 ExternalAddress(float_signflip())); 3515 %} 3516 ins_pipe(pipe_slow); 3517 %} 3518 3519 instruct negD_reg(regD dst) %{ 3520 predicate(UseAVX == 0); 3521 match(Set dst (NegD dst)); 3522 ins_cost(150); 3523 format %{ "xorpd $dst, [0x8000000000000000]\t" 3524 "# neg double by sign flipping" %} 3525 ins_encode %{ 3526 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3527 %} 3528 ins_pipe(pipe_slow); 3529 %} 3530 3531 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3532 predicate(UseAVX > 0); 3533 match(Set dst (NegD src)); 3534 ins_cost(150); 3535 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3536 "# neg double by sign flipping" %} 3537 ins_encode %{ 3538 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3539 ExternalAddress(double_signflip())); 3540 %} 3541 ins_pipe(pipe_slow); 3542 %} 3543 3544 // sqrtss instruction needs destination register to be pre initialized for best performance 3545 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3546 instruct sqrtF_reg(regF dst) %{ 3547 match(Set dst (SqrtF dst)); 3548 format %{ "sqrtss $dst, $dst" %} 3549 ins_encode %{ 3550 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe(pipe_slow); 3553 %} 3554 3555 // sqrtsd instruction needs destination register to be pre initialized for best performance 3556 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3557 instruct sqrtD_reg(regD dst) %{ 3558 match(Set dst (SqrtD dst)); 3559 format %{ "sqrtsd $dst, $dst" %} 3560 ins_encode %{ 3561 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe(pipe_slow); 3564 %} 3565 3566 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3567 effect(TEMP tmp); 3568 match(Set dst (ConvF2HF src)); 3569 ins_cost(125); 3570 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3571 ins_encode %{ 3572 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3578 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3579 effect(TEMP ktmp, TEMP rtmp); 3580 match(Set mem (StoreC mem (ConvF2HF src))); 3581 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3582 ins_encode %{ 3583 __ movl($rtmp$$Register, 0x1); 3584 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3585 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3586 %} 3587 ins_pipe( pipe_slow ); 3588 %} 3589 3590 instruct vconvF2HF(vec dst, vec src) %{ 3591 match(Set dst (VectorCastF2HF src)); 3592 format %{ "vector_conv_F2HF $dst $src" %} 3593 ins_encode %{ 3594 int vlen_enc = vector_length_encoding(this, $src); 3595 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3596 %} 3597 ins_pipe( pipe_slow ); 3598 %} 3599 3600 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3601 predicate(n->as_StoreVector()->memory_size() >= 16); 3602 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3603 format %{ "vcvtps2ph $mem,$src" %} 3604 ins_encode %{ 3605 int vlen_enc = vector_length_encoding(this, $src); 3606 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3612 match(Set dst (ConvHF2F src)); 3613 format %{ "vcvtph2ps $dst,$src" %} 3614 ins_encode %{ 3615 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3621 match(Set dst (VectorCastHF2F (LoadVector mem))); 3622 format %{ "vcvtph2ps $dst,$mem" %} 3623 ins_encode %{ 3624 int vlen_enc = vector_length_encoding(this); 3625 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3626 %} 3627 ins_pipe( pipe_slow ); 3628 %} 3629 3630 instruct vconvHF2F(vec dst, vec src) %{ 3631 match(Set dst (VectorCastHF2F src)); 3632 ins_cost(125); 3633 format %{ "vector_conv_HF2F $dst,$src" %} 3634 ins_encode %{ 3635 int vlen_enc = vector_length_encoding(this); 3636 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 // ---------------------------------------- VectorReinterpret ------------------------------------ 3642 instruct reinterpret_mask(kReg dst) %{ 3643 predicate(n->bottom_type()->isa_vectmask() && 3644 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3645 match(Set dst (VectorReinterpret dst)); 3646 ins_cost(125); 3647 format %{ "vector_reinterpret $dst\t!" %} 3648 ins_encode %{ 3649 // empty 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3655 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3656 n->bottom_type()->isa_vectmask() && 3657 n->in(1)->bottom_type()->isa_vectmask() && 3658 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3659 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3660 match(Set dst (VectorReinterpret src)); 3661 effect(TEMP xtmp); 3662 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3663 ins_encode %{ 3664 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3665 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3666 assert(src_sz == dst_sz , "src and dst size mismatch"); 3667 int vlen_enc = vector_length_encoding(src_sz); 3668 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3669 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3675 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3676 n->bottom_type()->isa_vectmask() && 3677 n->in(1)->bottom_type()->isa_vectmask() && 3678 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3679 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3680 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3681 match(Set dst (VectorReinterpret src)); 3682 effect(TEMP xtmp); 3683 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3684 ins_encode %{ 3685 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3686 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3687 assert(src_sz == dst_sz , "src and dst size mismatch"); 3688 int vlen_enc = vector_length_encoding(src_sz); 3689 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3690 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3696 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3697 n->bottom_type()->isa_vectmask() && 3698 n->in(1)->bottom_type()->isa_vectmask() && 3699 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3700 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3701 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3702 match(Set dst (VectorReinterpret src)); 3703 effect(TEMP xtmp); 3704 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3705 ins_encode %{ 3706 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3707 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3708 assert(src_sz == dst_sz , "src and dst size mismatch"); 3709 int vlen_enc = vector_length_encoding(src_sz); 3710 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3711 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct reinterpret(vec dst) %{ 3717 predicate(!n->bottom_type()->isa_vectmask() && 3718 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3719 match(Set dst (VectorReinterpret dst)); 3720 ins_cost(125); 3721 format %{ "vector_reinterpret $dst\t!" %} 3722 ins_encode %{ 3723 // empty 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct reinterpret_expand(vec dst, vec src) %{ 3729 predicate(UseAVX == 0 && 3730 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3731 match(Set dst (VectorReinterpret src)); 3732 ins_cost(125); 3733 effect(TEMP dst); 3734 format %{ "vector_reinterpret_expand $dst,$src" %} 3735 ins_encode %{ 3736 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3737 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3738 3739 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3740 if (src_vlen_in_bytes == 4) { 3741 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3742 } else { 3743 assert(src_vlen_in_bytes == 8, ""); 3744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3745 } 3746 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3747 %} 3748 ins_pipe( pipe_slow ); 3749 %} 3750 3751 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3752 predicate(UseAVX > 0 && 3753 !n->bottom_type()->isa_vectmask() && 3754 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3755 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3756 match(Set dst (VectorReinterpret src)); 3757 ins_cost(125); 3758 format %{ "vector_reinterpret_expand $dst,$src" %} 3759 ins_encode %{ 3760 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 3766 instruct vreinterpret_expand(legVec dst, vec src) %{ 3767 predicate(UseAVX > 0 && 3768 !n->bottom_type()->isa_vectmask() && 3769 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3770 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3771 match(Set dst (VectorReinterpret src)); 3772 ins_cost(125); 3773 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3774 ins_encode %{ 3775 switch (Matcher::vector_length_in_bytes(this, $src)) { 3776 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3777 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3778 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3779 default: ShouldNotReachHere(); 3780 } 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct reinterpret_shrink(vec dst, legVec src) %{ 3786 predicate(!n->bottom_type()->isa_vectmask() && 3787 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3788 match(Set dst (VectorReinterpret src)); 3789 ins_cost(125); 3790 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3791 ins_encode %{ 3792 switch (Matcher::vector_length_in_bytes(this)) { 3793 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3794 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3795 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3796 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3797 default: ShouldNotReachHere(); 3798 } 3799 %} 3800 ins_pipe( pipe_slow ); 3801 %} 3802 3803 // ---------------------------------------------------------------------------------------------------- 3804 3805 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3806 match(Set dst (RoundDoubleMode src rmode)); 3807 format %{ "roundsd $dst,$src" %} 3808 ins_cost(150); 3809 ins_encode %{ 3810 assert(UseSSE >= 4, "required"); 3811 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3812 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3813 } 3814 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3815 %} 3816 ins_pipe(pipe_slow); 3817 %} 3818 3819 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3820 match(Set dst (RoundDoubleMode con rmode)); 3821 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3822 ins_cost(150); 3823 ins_encode %{ 3824 assert(UseSSE >= 4, "required"); 3825 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3826 %} 3827 ins_pipe(pipe_slow); 3828 %} 3829 3830 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3831 predicate(Matcher::vector_length(n) < 8); 3832 match(Set dst (RoundDoubleModeV src rmode)); 3833 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3834 ins_encode %{ 3835 assert(UseAVX > 0, "required"); 3836 int vlen_enc = vector_length_encoding(this); 3837 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3838 %} 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3843 predicate(Matcher::vector_length(n) == 8); 3844 match(Set dst (RoundDoubleModeV src rmode)); 3845 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3846 ins_encode %{ 3847 assert(UseAVX > 2, "required"); 3848 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3849 %} 3850 ins_pipe( pipe_slow ); 3851 %} 3852 3853 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3854 predicate(Matcher::vector_length(n) < 8); 3855 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3856 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3857 ins_encode %{ 3858 assert(UseAVX > 0, "required"); 3859 int vlen_enc = vector_length_encoding(this); 3860 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3866 predicate(Matcher::vector_length(n) == 8); 3867 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3868 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3869 ins_encode %{ 3870 assert(UseAVX > 2, "required"); 3871 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct onspinwait() %{ 3877 match(OnSpinWait); 3878 ins_cost(200); 3879 3880 format %{ 3881 $$template 3882 $$emit$$"pause\t! membar_onspinwait" 3883 %} 3884 ins_encode %{ 3885 __ pause(); 3886 %} 3887 ins_pipe(pipe_slow); 3888 %} 3889 3890 // a * b + c 3891 instruct fmaD_reg(regD a, regD b, regD c) %{ 3892 match(Set c (FmaD c (Binary a b))); 3893 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3894 ins_cost(150); 3895 ins_encode %{ 3896 assert(UseFMA, "Needs FMA instructions support."); 3897 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 // a * b + c 3903 instruct fmaF_reg(regF a, regF b, regF c) %{ 3904 match(Set c (FmaF c (Binary a b))); 3905 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3906 ins_cost(150); 3907 ins_encode %{ 3908 assert(UseFMA, "Needs FMA instructions support."); 3909 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3910 %} 3911 ins_pipe( pipe_slow ); 3912 %} 3913 3914 // ====================VECTOR INSTRUCTIONS===================================== 3915 3916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3917 instruct MoveVec2Leg(legVec dst, vec src) %{ 3918 match(Set dst src); 3919 format %{ "" %} 3920 ins_encode %{ 3921 ShouldNotReachHere(); 3922 %} 3923 ins_pipe( fpu_reg_reg ); 3924 %} 3925 3926 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3927 match(Set dst src); 3928 format %{ "" %} 3929 ins_encode %{ 3930 ShouldNotReachHere(); 3931 %} 3932 ins_pipe( fpu_reg_reg ); 3933 %} 3934 3935 // ============================================================================ 3936 3937 // Load vectors generic operand pattern 3938 instruct loadV(vec dst, memory mem) %{ 3939 match(Set dst (LoadVector mem)); 3940 ins_cost(125); 3941 format %{ "load_vector $dst,$mem" %} 3942 ins_encode %{ 3943 BasicType bt = Matcher::vector_element_basic_type(this); 3944 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 // Store vectors generic operand pattern. 3950 instruct storeV(memory mem, vec src) %{ 3951 match(Set mem (StoreVector mem src)); 3952 ins_cost(145); 3953 format %{ "store_vector $mem,$src\n\t" %} 3954 ins_encode %{ 3955 switch (Matcher::vector_length_in_bytes(this, $src)) { 3956 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3957 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3958 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3959 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3960 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3961 default: ShouldNotReachHere(); 3962 } 3963 %} 3964 ins_pipe( pipe_slow ); 3965 %} 3966 3967 // ---------------------------------------- Gather ------------------------------------ 3968 3969 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 3970 3971 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3972 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 3973 Matcher::vector_length_in_bytes(n) <= 32); 3974 match(Set dst (LoadVectorGather mem idx)); 3975 effect(TEMP dst, TEMP tmp, TEMP mask); 3976 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3977 ins_encode %{ 3978 int vlen_enc = vector_length_encoding(this); 3979 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3980 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3981 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3982 __ lea($tmp$$Register, $mem$$Address); 3983 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3984 %} 3985 ins_pipe( pipe_slow ); 3986 %} 3987 3988 3989 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3990 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 3991 !is_subword_type(Matcher::vector_element_basic_type(n))); 3992 match(Set dst (LoadVectorGather mem idx)); 3993 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3994 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3995 ins_encode %{ 3996 int vlen_enc = vector_length_encoding(this); 3997 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3998 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 3999 __ lea($tmp$$Register, $mem$$Address); 4000 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4001 %} 4002 ins_pipe( pipe_slow ); 4003 %} 4004 4005 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4006 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4007 !is_subword_type(Matcher::vector_element_basic_type(n))); 4008 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4009 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4010 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4011 ins_encode %{ 4012 assert(UseAVX > 2, "sanity"); 4013 int vlen_enc = vector_length_encoding(this); 4014 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4015 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4016 // Note: Since gather instruction partially updates the opmask register used 4017 // for predication hense moving mask operand to a temporary. 4018 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4019 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4020 __ lea($tmp$$Register, $mem$$Address); 4021 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4027 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4028 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4029 effect(TEMP tmp, TEMP rtmp); 4030 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4031 ins_encode %{ 4032 int vlen_enc = vector_length_encoding(this); 4033 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4034 __ lea($tmp$$Register, $mem$$Address); 4035 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4041 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4042 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4043 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4044 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4045 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4046 ins_encode %{ 4047 int vlen_enc = vector_length_encoding(this); 4048 int vector_len = Matcher::vector_length(this); 4049 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4050 __ lea($tmp$$Register, $mem$$Address); 4051 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4052 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4053 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4054 %} 4055 ins_pipe( pipe_slow ); 4056 %} 4057 4058 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4059 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4060 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4061 effect(TEMP tmp, TEMP rtmp, KILL cr); 4062 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4063 ins_encode %{ 4064 int vlen_enc = vector_length_encoding(this); 4065 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4066 __ lea($tmp$$Register, $mem$$Address); 4067 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4068 %} 4069 ins_pipe( pipe_slow ); 4070 %} 4071 4072 4073 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4074 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4075 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4076 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4077 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4078 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4079 ins_encode %{ 4080 int vlen_enc = vector_length_encoding(this); 4081 int vector_len = Matcher::vector_length(this); 4082 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4083 __ lea($tmp$$Register, $mem$$Address); 4084 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4085 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4086 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 4092 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4093 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4094 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4095 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4096 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4097 ins_encode %{ 4098 int vlen_enc = vector_length_encoding(this); 4099 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4100 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4101 __ lea($tmp$$Register, $mem$$Address); 4102 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4103 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4104 %} 4105 ins_pipe( pipe_slow ); 4106 %} 4107 4108 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4109 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4110 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4111 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4112 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4113 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4114 ins_encode %{ 4115 int vlen_enc = vector_length_encoding(this); 4116 int vector_len = Matcher::vector_length(this); 4117 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4118 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4119 __ lea($tmp$$Register, $mem$$Address); 4120 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4121 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4122 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4123 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4129 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4130 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4131 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4132 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4133 ins_encode %{ 4134 int vlen_enc = vector_length_encoding(this); 4135 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4136 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4137 __ lea($tmp$$Register, $mem$$Address); 4138 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4139 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4140 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4141 %} 4142 ins_pipe( pipe_slow ); 4143 %} 4144 4145 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4146 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4147 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4148 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4149 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4150 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4151 ins_encode %{ 4152 int vlen_enc = vector_length_encoding(this); 4153 int vector_len = Matcher::vector_length(this); 4154 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4155 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4156 __ lea($tmp$$Register, $mem$$Address); 4157 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4158 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4159 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4160 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4161 %} 4162 ins_pipe( pipe_slow ); 4163 %} 4164 4165 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4166 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4167 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4168 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4169 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4170 ins_encode %{ 4171 int vlen_enc = vector_length_encoding(this); 4172 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4173 __ lea($tmp$$Register, $mem$$Address); 4174 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4175 if (elem_bt == T_SHORT) { 4176 __ movl($mask_idx$$Register, 0x55555555); 4177 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4178 } 4179 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4180 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4181 %} 4182 ins_pipe( pipe_slow ); 4183 %} 4184 4185 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4186 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4187 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4188 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4189 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4190 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4191 ins_encode %{ 4192 int vlen_enc = vector_length_encoding(this); 4193 int vector_len = Matcher::vector_length(this); 4194 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4195 __ lea($tmp$$Register, $mem$$Address); 4196 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4197 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4198 if (elem_bt == T_SHORT) { 4199 __ movl($mask_idx$$Register, 0x55555555); 4200 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4201 } 4202 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4203 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4204 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4210 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4211 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4212 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4213 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4214 ins_encode %{ 4215 int vlen_enc = vector_length_encoding(this); 4216 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4217 __ lea($tmp$$Register, $mem$$Address); 4218 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4219 if (elem_bt == T_SHORT) { 4220 __ movl($mask_idx$$Register, 0x55555555); 4221 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4222 } 4223 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4224 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4225 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4226 %} 4227 ins_pipe( pipe_slow ); 4228 %} 4229 4230 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4231 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4232 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4233 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4234 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4235 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4236 ins_encode %{ 4237 int vlen_enc = vector_length_encoding(this); 4238 int vector_len = Matcher::vector_length(this); 4239 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4240 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4241 __ lea($tmp$$Register, $mem$$Address); 4242 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4243 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4244 if (elem_bt == T_SHORT) { 4245 __ movl($mask_idx$$Register, 0x55555555); 4246 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4247 } 4248 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4249 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4250 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 // ====================Scatter======================================= 4256 4257 // Scatter INT, LONG, FLOAT, DOUBLE 4258 4259 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4260 predicate(UseAVX > 2); 4261 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4262 effect(TEMP tmp, TEMP ktmp); 4263 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4264 ins_encode %{ 4265 int vlen_enc = vector_length_encoding(this, $src); 4266 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4267 4268 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4269 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4270 4271 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4272 __ lea($tmp$$Register, $mem$$Address); 4273 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4279 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4280 effect(TEMP tmp, TEMP ktmp); 4281 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4282 ins_encode %{ 4283 int vlen_enc = vector_length_encoding(this, $src); 4284 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4285 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4286 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4287 // Note: Since scatter instruction partially updates the opmask register used 4288 // for predication hense moving mask operand to a temporary. 4289 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4290 __ lea($tmp$$Register, $mem$$Address); 4291 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 // ====================REPLICATE======================================= 4297 4298 // Replicate byte scalar to be vector 4299 instruct vReplB_reg(vec dst, rRegI src) %{ 4300 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4301 match(Set dst (Replicate src)); 4302 format %{ "replicateB $dst,$src" %} 4303 ins_encode %{ 4304 uint vlen = Matcher::vector_length(this); 4305 if (UseAVX >= 2) { 4306 int vlen_enc = vector_length_encoding(this); 4307 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4308 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4309 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4310 } else { 4311 __ movdl($dst$$XMMRegister, $src$$Register); 4312 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4313 } 4314 } else { 4315 assert(UseAVX < 2, ""); 4316 __ movdl($dst$$XMMRegister, $src$$Register); 4317 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4318 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4319 if (vlen >= 16) { 4320 assert(vlen == 16, ""); 4321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4322 } 4323 } 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 instruct ReplB_mem(vec dst, memory mem) %{ 4329 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4330 match(Set dst (Replicate (LoadB mem))); 4331 format %{ "replicateB $dst,$mem" %} 4332 ins_encode %{ 4333 int vlen_enc = vector_length_encoding(this); 4334 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 // ====================ReplicateS======================================= 4340 4341 instruct vReplS_reg(vec dst, rRegI src) %{ 4342 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4343 match(Set dst (Replicate src)); 4344 format %{ "replicateS $dst,$src" %} 4345 ins_encode %{ 4346 uint vlen = Matcher::vector_length(this); 4347 int vlen_enc = vector_length_encoding(this); 4348 if (UseAVX >= 2) { 4349 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4350 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4351 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4352 } else { 4353 __ movdl($dst$$XMMRegister, $src$$Register); 4354 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4355 } 4356 } else { 4357 assert(UseAVX < 2, ""); 4358 __ movdl($dst$$XMMRegister, $src$$Register); 4359 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4360 if (vlen >= 8) { 4361 assert(vlen == 8, ""); 4362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4363 } 4364 } 4365 %} 4366 ins_pipe( pipe_slow ); 4367 %} 4368 4369 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4370 match(Set dst (Replicate con)); 4371 effect(TEMP rtmp); 4372 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4373 ins_encode %{ 4374 int vlen_enc = vector_length_encoding(this); 4375 BasicType bt = Matcher::vector_element_basic_type(this); 4376 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4377 __ movl($rtmp$$Register, $con$$constant); 4378 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4384 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4385 match(Set dst (Replicate src)); 4386 effect(TEMP rtmp); 4387 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4388 ins_encode %{ 4389 int vlen_enc = vector_length_encoding(this); 4390 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4391 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct ReplS_mem(vec dst, memory mem) %{ 4397 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4398 match(Set dst (Replicate (LoadS mem))); 4399 format %{ "replicateS $dst,$mem" %} 4400 ins_encode %{ 4401 int vlen_enc = vector_length_encoding(this); 4402 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4403 %} 4404 ins_pipe( pipe_slow ); 4405 %} 4406 4407 // ====================ReplicateI======================================= 4408 4409 instruct ReplI_reg(vec dst, rRegI src) %{ 4410 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4411 match(Set dst (Replicate src)); 4412 format %{ "replicateI $dst,$src" %} 4413 ins_encode %{ 4414 uint vlen = Matcher::vector_length(this); 4415 int vlen_enc = vector_length_encoding(this); 4416 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4417 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4418 } else if (VM_Version::supports_avx2()) { 4419 __ movdl($dst$$XMMRegister, $src$$Register); 4420 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4421 } else { 4422 __ movdl($dst$$XMMRegister, $src$$Register); 4423 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4424 } 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct ReplI_mem(vec dst, memory mem) %{ 4430 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4431 match(Set dst (Replicate (LoadI mem))); 4432 format %{ "replicateI $dst,$mem" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this); 4435 if (VM_Version::supports_avx2()) { 4436 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4437 } else if (VM_Version::supports_avx()) { 4438 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4439 } else { 4440 __ movdl($dst$$XMMRegister, $mem$$Address); 4441 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4442 } 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct ReplI_imm(vec dst, immI con) %{ 4448 predicate(Matcher::is_non_long_integral_vector(n)); 4449 match(Set dst (Replicate con)); 4450 format %{ "replicateI $dst,$con" %} 4451 ins_encode %{ 4452 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4453 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4454 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4455 BasicType bt = Matcher::vector_element_basic_type(this); 4456 int vlen = Matcher::vector_length_in_bytes(this); 4457 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 // Replicate scalar zero to be vector 4463 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4464 predicate(Matcher::is_non_long_integral_vector(n)); 4465 match(Set dst (Replicate zero)); 4466 format %{ "replicateI $dst,$zero" %} 4467 ins_encode %{ 4468 int vlen_enc = vector_length_encoding(this); 4469 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4470 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4471 } else { 4472 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4473 } 4474 %} 4475 ins_pipe( fpu_reg_reg ); 4476 %} 4477 4478 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4479 predicate(Matcher::is_non_long_integral_vector(n)); 4480 match(Set dst (Replicate con)); 4481 format %{ "vallones $dst" %} 4482 ins_encode %{ 4483 int vector_len = vector_length_encoding(this); 4484 __ vallones($dst$$XMMRegister, vector_len); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 // ====================ReplicateL======================================= 4490 4491 // Replicate long (8 byte) scalar to be vector 4492 instruct ReplL_reg(vec dst, rRegL src) %{ 4493 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4494 match(Set dst (Replicate src)); 4495 format %{ "replicateL $dst,$src" %} 4496 ins_encode %{ 4497 int vlen = Matcher::vector_length(this); 4498 int vlen_enc = vector_length_encoding(this); 4499 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4500 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4501 } else if (VM_Version::supports_avx2()) { 4502 __ movdq($dst$$XMMRegister, $src$$Register); 4503 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4504 } else { 4505 __ movdq($dst$$XMMRegister, $src$$Register); 4506 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4507 } 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct ReplL_mem(vec dst, memory mem) %{ 4513 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4514 match(Set dst (Replicate (LoadL mem))); 4515 format %{ "replicateL $dst,$mem" %} 4516 ins_encode %{ 4517 int vlen_enc = vector_length_encoding(this); 4518 if (VM_Version::supports_avx2()) { 4519 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4520 } else if (VM_Version::supports_sse3()) { 4521 __ movddup($dst$$XMMRegister, $mem$$Address); 4522 } else { 4523 __ movq($dst$$XMMRegister, $mem$$Address); 4524 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4525 } 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4531 instruct ReplL_imm(vec dst, immL con) %{ 4532 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4533 match(Set dst (Replicate con)); 4534 format %{ "replicateL $dst,$con" %} 4535 ins_encode %{ 4536 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4537 int vlen = Matcher::vector_length_in_bytes(this); 4538 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4539 %} 4540 ins_pipe( pipe_slow ); 4541 %} 4542 4543 instruct ReplL_zero(vec dst, immL0 zero) %{ 4544 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4545 match(Set dst (Replicate zero)); 4546 format %{ "replicateL $dst,$zero" %} 4547 ins_encode %{ 4548 int vlen_enc = vector_length_encoding(this); 4549 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4550 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4551 } else { 4552 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4553 } 4554 %} 4555 ins_pipe( fpu_reg_reg ); 4556 %} 4557 4558 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4559 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4560 match(Set dst (Replicate con)); 4561 format %{ "vallones $dst" %} 4562 ins_encode %{ 4563 int vector_len = vector_length_encoding(this); 4564 __ vallones($dst$$XMMRegister, vector_len); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 // ====================ReplicateF======================================= 4570 4571 instruct vReplF_reg(vec dst, vlRegF src) %{ 4572 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4573 match(Set dst (Replicate src)); 4574 format %{ "replicateF $dst,$src" %} 4575 ins_encode %{ 4576 uint vlen = Matcher::vector_length(this); 4577 int vlen_enc = vector_length_encoding(this); 4578 if (vlen <= 4) { 4579 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4580 } else if (VM_Version::supports_avx2()) { 4581 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4582 } else { 4583 assert(vlen == 8, "sanity"); 4584 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4585 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4586 } 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct ReplF_reg(vec dst, vlRegF src) %{ 4592 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4593 match(Set dst (Replicate src)); 4594 format %{ "replicateF $dst,$src" %} 4595 ins_encode %{ 4596 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4597 %} 4598 ins_pipe( pipe_slow ); 4599 %} 4600 4601 instruct ReplF_mem(vec dst, memory mem) %{ 4602 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4603 match(Set dst (Replicate (LoadF mem))); 4604 format %{ "replicateF $dst,$mem" %} 4605 ins_encode %{ 4606 int vlen_enc = vector_length_encoding(this); 4607 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 // Replicate float scalar immediate to be vector by loading from const table. 4613 instruct ReplF_imm(vec dst, immF con) %{ 4614 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4615 match(Set dst (Replicate con)); 4616 format %{ "replicateF $dst,$con" %} 4617 ins_encode %{ 4618 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4619 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4620 int vlen = Matcher::vector_length_in_bytes(this); 4621 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct ReplF_zero(vec dst, immF0 zero) %{ 4627 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4628 match(Set dst (Replicate zero)); 4629 format %{ "replicateF $dst,$zero" %} 4630 ins_encode %{ 4631 int vlen_enc = vector_length_encoding(this); 4632 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4633 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4634 } else { 4635 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4636 } 4637 %} 4638 ins_pipe( fpu_reg_reg ); 4639 %} 4640 4641 // ====================ReplicateD======================================= 4642 4643 // Replicate double (8 bytes) scalar to be vector 4644 instruct vReplD_reg(vec dst, vlRegD src) %{ 4645 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4646 match(Set dst (Replicate src)); 4647 format %{ "replicateD $dst,$src" %} 4648 ins_encode %{ 4649 uint vlen = Matcher::vector_length(this); 4650 int vlen_enc = vector_length_encoding(this); 4651 if (vlen <= 2) { 4652 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4653 } else if (VM_Version::supports_avx2()) { 4654 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4655 } else { 4656 assert(vlen == 4, "sanity"); 4657 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4658 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4659 } 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 4664 instruct ReplD_reg(vec dst, vlRegD src) %{ 4665 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4666 match(Set dst (Replicate src)); 4667 format %{ "replicateD $dst,$src" %} 4668 ins_encode %{ 4669 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 4674 instruct ReplD_mem(vec dst, memory mem) %{ 4675 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4676 match(Set dst (Replicate (LoadD mem))); 4677 format %{ "replicateD $dst,$mem" %} 4678 ins_encode %{ 4679 if (Matcher::vector_length(this) >= 4) { 4680 int vlen_enc = vector_length_encoding(this); 4681 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4682 } else { 4683 __ movddup($dst$$XMMRegister, $mem$$Address); 4684 } 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4690 instruct ReplD_imm(vec dst, immD con) %{ 4691 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4692 match(Set dst (Replicate con)); 4693 format %{ "replicateD $dst,$con" %} 4694 ins_encode %{ 4695 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4696 int vlen = Matcher::vector_length_in_bytes(this); 4697 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct ReplD_zero(vec dst, immD0 zero) %{ 4703 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4704 match(Set dst (Replicate zero)); 4705 format %{ "replicateD $dst,$zero" %} 4706 ins_encode %{ 4707 int vlen_enc = vector_length_encoding(this); 4708 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4709 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4710 } else { 4711 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4712 } 4713 %} 4714 ins_pipe( fpu_reg_reg ); 4715 %} 4716 4717 // ====================VECTOR INSERT======================================= 4718 4719 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4720 predicate(Matcher::vector_length_in_bytes(n) < 32); 4721 match(Set dst (VectorInsert (Binary dst val) idx)); 4722 format %{ "vector_insert $dst,$val,$idx" %} 4723 ins_encode %{ 4724 assert(UseSSE >= 4, "required"); 4725 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4726 4727 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4728 4729 assert(is_integral_type(elem_bt), ""); 4730 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4731 4732 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4738 predicate(Matcher::vector_length_in_bytes(n) == 32); 4739 match(Set dst (VectorInsert (Binary src val) idx)); 4740 effect(TEMP vtmp); 4741 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4742 ins_encode %{ 4743 int vlen_enc = Assembler::AVX_256bit; 4744 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4745 int elem_per_lane = 16/type2aelembytes(elem_bt); 4746 int log2epr = log2(elem_per_lane); 4747 4748 assert(is_integral_type(elem_bt), "sanity"); 4749 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4750 4751 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4752 uint y_idx = ($idx$$constant >> log2epr) & 1; 4753 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4754 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4755 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4756 %} 4757 ins_pipe( pipe_slow ); 4758 %} 4759 4760 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4761 predicate(Matcher::vector_length_in_bytes(n) == 64); 4762 match(Set dst (VectorInsert (Binary src val) idx)); 4763 effect(TEMP vtmp); 4764 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4765 ins_encode %{ 4766 assert(UseAVX > 2, "sanity"); 4767 4768 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4769 int elem_per_lane = 16/type2aelembytes(elem_bt); 4770 int log2epr = log2(elem_per_lane); 4771 4772 assert(is_integral_type(elem_bt), ""); 4773 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4774 4775 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4776 uint y_idx = ($idx$$constant >> log2epr) & 3; 4777 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4778 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4779 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4785 predicate(Matcher::vector_length(n) == 2); 4786 match(Set dst (VectorInsert (Binary dst val) idx)); 4787 format %{ "vector_insert $dst,$val,$idx" %} 4788 ins_encode %{ 4789 assert(UseSSE >= 4, "required"); 4790 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4791 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4792 4793 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4799 predicate(Matcher::vector_length(n) == 4); 4800 match(Set dst (VectorInsert (Binary src val) idx)); 4801 effect(TEMP vtmp); 4802 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4803 ins_encode %{ 4804 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4805 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4806 4807 uint x_idx = $idx$$constant & right_n_bits(1); 4808 uint y_idx = ($idx$$constant >> 1) & 1; 4809 int vlen_enc = Assembler::AVX_256bit; 4810 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4811 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4812 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4818 predicate(Matcher::vector_length(n) == 8); 4819 match(Set dst (VectorInsert (Binary src val) idx)); 4820 effect(TEMP vtmp); 4821 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4822 ins_encode %{ 4823 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4824 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4825 4826 uint x_idx = $idx$$constant & right_n_bits(1); 4827 uint y_idx = ($idx$$constant >> 1) & 3; 4828 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4829 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4830 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4831 %} 4832 ins_pipe( pipe_slow ); 4833 %} 4834 4835 instruct insertF(vec dst, regF val, immU8 idx) %{ 4836 predicate(Matcher::vector_length(n) < 8); 4837 match(Set dst (VectorInsert (Binary dst val) idx)); 4838 format %{ "vector_insert $dst,$val,$idx" %} 4839 ins_encode %{ 4840 assert(UseSSE >= 4, "sanity"); 4841 4842 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4843 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4844 4845 uint x_idx = $idx$$constant & right_n_bits(2); 4846 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4852 predicate(Matcher::vector_length(n) >= 8); 4853 match(Set dst (VectorInsert (Binary src val) idx)); 4854 effect(TEMP vtmp); 4855 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4856 ins_encode %{ 4857 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4858 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4859 4860 int vlen = Matcher::vector_length(this); 4861 uint x_idx = $idx$$constant & right_n_bits(2); 4862 if (vlen == 8) { 4863 uint y_idx = ($idx$$constant >> 2) & 1; 4864 int vlen_enc = Assembler::AVX_256bit; 4865 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4866 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4867 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4868 } else { 4869 assert(vlen == 16, "sanity"); 4870 uint y_idx = ($idx$$constant >> 2) & 3; 4871 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4872 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4873 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4874 } 4875 %} 4876 ins_pipe( pipe_slow ); 4877 %} 4878 4879 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4880 predicate(Matcher::vector_length(n) == 2); 4881 match(Set dst (VectorInsert (Binary dst val) idx)); 4882 effect(TEMP tmp); 4883 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4884 ins_encode %{ 4885 assert(UseSSE >= 4, "sanity"); 4886 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4887 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4888 4889 __ movq($tmp$$Register, $val$$XMMRegister); 4890 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4891 %} 4892 ins_pipe( pipe_slow ); 4893 %} 4894 4895 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4896 predicate(Matcher::vector_length(n) == 4); 4897 match(Set dst (VectorInsert (Binary src val) idx)); 4898 effect(TEMP vtmp, TEMP tmp); 4899 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4900 ins_encode %{ 4901 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4902 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4903 4904 uint x_idx = $idx$$constant & right_n_bits(1); 4905 uint y_idx = ($idx$$constant >> 1) & 1; 4906 int vlen_enc = Assembler::AVX_256bit; 4907 __ movq($tmp$$Register, $val$$XMMRegister); 4908 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4909 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4910 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4911 %} 4912 ins_pipe( pipe_slow ); 4913 %} 4914 4915 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4916 predicate(Matcher::vector_length(n) == 8); 4917 match(Set dst (VectorInsert (Binary src val) idx)); 4918 effect(TEMP tmp, TEMP vtmp); 4919 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4920 ins_encode %{ 4921 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4922 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4923 4924 uint x_idx = $idx$$constant & right_n_bits(1); 4925 uint y_idx = ($idx$$constant >> 1) & 3; 4926 __ movq($tmp$$Register, $val$$XMMRegister); 4927 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4928 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4929 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4930 %} 4931 ins_pipe( pipe_slow ); 4932 %} 4933 4934 // ====================REDUCTION ARITHMETIC======================================= 4935 4936 // =======================Int Reduction========================================== 4937 4938 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4939 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4940 match(Set dst (AddReductionVI src1 src2)); 4941 match(Set dst (MulReductionVI src1 src2)); 4942 match(Set dst (AndReductionV src1 src2)); 4943 match(Set dst ( OrReductionV src1 src2)); 4944 match(Set dst (XorReductionV src1 src2)); 4945 match(Set dst (MinReductionV src1 src2)); 4946 match(Set dst (MaxReductionV src1 src2)); 4947 effect(TEMP vtmp1, TEMP vtmp2); 4948 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4949 ins_encode %{ 4950 int opcode = this->ideal_Opcode(); 4951 int vlen = Matcher::vector_length(this, $src2); 4952 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 // =======================Long Reduction========================================== 4958 4959 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4960 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4961 match(Set dst (AddReductionVL src1 src2)); 4962 match(Set dst (MulReductionVL src1 src2)); 4963 match(Set dst (AndReductionV src1 src2)); 4964 match(Set dst ( OrReductionV src1 src2)); 4965 match(Set dst (XorReductionV src1 src2)); 4966 match(Set dst (MinReductionV src1 src2)); 4967 match(Set dst (MaxReductionV src1 src2)); 4968 effect(TEMP vtmp1, TEMP vtmp2); 4969 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4970 ins_encode %{ 4971 int opcode = this->ideal_Opcode(); 4972 int vlen = Matcher::vector_length(this, $src2); 4973 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4974 %} 4975 ins_pipe( pipe_slow ); 4976 %} 4977 4978 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4979 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4980 match(Set dst (AddReductionVL src1 src2)); 4981 match(Set dst (MulReductionVL src1 src2)); 4982 match(Set dst (AndReductionV src1 src2)); 4983 match(Set dst ( OrReductionV src1 src2)); 4984 match(Set dst (XorReductionV src1 src2)); 4985 match(Set dst (MinReductionV src1 src2)); 4986 match(Set dst (MaxReductionV src1 src2)); 4987 effect(TEMP vtmp1, TEMP vtmp2); 4988 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4989 ins_encode %{ 4990 int opcode = this->ideal_Opcode(); 4991 int vlen = Matcher::vector_length(this, $src2); 4992 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4993 %} 4994 ins_pipe( pipe_slow ); 4995 %} 4996 4997 // =======================Float Reduction========================================== 4998 4999 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5000 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5001 match(Set dst (AddReductionVF dst src)); 5002 match(Set dst (MulReductionVF dst src)); 5003 effect(TEMP dst, TEMP vtmp); 5004 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5005 ins_encode %{ 5006 int opcode = this->ideal_Opcode(); 5007 int vlen = Matcher::vector_length(this, $src); 5008 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5014 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5015 match(Set dst (AddReductionVF dst src)); 5016 match(Set dst (MulReductionVF dst src)); 5017 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5018 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5019 ins_encode %{ 5020 int opcode = this->ideal_Opcode(); 5021 int vlen = Matcher::vector_length(this, $src); 5022 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5023 %} 5024 ins_pipe( pipe_slow ); 5025 %} 5026 5027 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5028 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5029 match(Set dst (AddReductionVF dst src)); 5030 match(Set dst (MulReductionVF dst src)); 5031 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5032 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5033 ins_encode %{ 5034 int opcode = this->ideal_Opcode(); 5035 int vlen = Matcher::vector_length(this, $src); 5036 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5037 %} 5038 ins_pipe( pipe_slow ); 5039 %} 5040 5041 5042 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5043 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5044 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5045 // src1 contains reduction identity 5046 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5047 match(Set dst (AddReductionVF src1 src2)); 5048 match(Set dst (MulReductionVF src1 src2)); 5049 effect(TEMP dst); 5050 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5051 ins_encode %{ 5052 int opcode = this->ideal_Opcode(); 5053 int vlen = Matcher::vector_length(this, $src2); 5054 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5055 %} 5056 ins_pipe( pipe_slow ); 5057 %} 5058 5059 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5060 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5061 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5062 // src1 contains reduction identity 5063 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5064 match(Set dst (AddReductionVF src1 src2)); 5065 match(Set dst (MulReductionVF src1 src2)); 5066 effect(TEMP dst, TEMP vtmp); 5067 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5068 ins_encode %{ 5069 int opcode = this->ideal_Opcode(); 5070 int vlen = Matcher::vector_length(this, $src2); 5071 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 5076 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5077 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5078 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5079 // src1 contains reduction identity 5080 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5081 match(Set dst (AddReductionVF src1 src2)); 5082 match(Set dst (MulReductionVF src1 src2)); 5083 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5084 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5085 ins_encode %{ 5086 int opcode = this->ideal_Opcode(); 5087 int vlen = Matcher::vector_length(this, $src2); 5088 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5094 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5095 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5096 // src1 contains reduction identity 5097 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5098 match(Set dst (AddReductionVF src1 src2)); 5099 match(Set dst (MulReductionVF src1 src2)); 5100 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5101 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5102 ins_encode %{ 5103 int opcode = this->ideal_Opcode(); 5104 int vlen = Matcher::vector_length(this, $src2); 5105 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5106 %} 5107 ins_pipe( pipe_slow ); 5108 %} 5109 5110 // =======================Double Reduction========================================== 5111 5112 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5113 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5114 match(Set dst (AddReductionVD dst src)); 5115 match(Set dst (MulReductionVD dst src)); 5116 effect(TEMP dst, TEMP vtmp); 5117 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5118 ins_encode %{ 5119 int opcode = this->ideal_Opcode(); 5120 int vlen = Matcher::vector_length(this, $src); 5121 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5127 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5128 match(Set dst (AddReductionVD dst src)); 5129 match(Set dst (MulReductionVD dst src)); 5130 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5131 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5132 ins_encode %{ 5133 int opcode = this->ideal_Opcode(); 5134 int vlen = Matcher::vector_length(this, $src); 5135 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5141 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5142 match(Set dst (AddReductionVD dst src)); 5143 match(Set dst (MulReductionVD dst src)); 5144 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5145 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5146 ins_encode %{ 5147 int opcode = this->ideal_Opcode(); 5148 int vlen = Matcher::vector_length(this, $src); 5149 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5150 %} 5151 ins_pipe( pipe_slow ); 5152 %} 5153 5154 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5155 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5156 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5157 // src1 contains reduction identity 5158 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5159 match(Set dst (AddReductionVD src1 src2)); 5160 match(Set dst (MulReductionVD src1 src2)); 5161 effect(TEMP dst); 5162 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5163 ins_encode %{ 5164 int opcode = this->ideal_Opcode(); 5165 int vlen = Matcher::vector_length(this, $src2); 5166 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5167 %} 5168 ins_pipe( pipe_slow ); 5169 %} 5170 5171 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5172 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5173 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5174 // src1 contains reduction identity 5175 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5176 match(Set dst (AddReductionVD src1 src2)); 5177 match(Set dst (MulReductionVD src1 src2)); 5178 effect(TEMP dst, TEMP vtmp); 5179 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5180 ins_encode %{ 5181 int opcode = this->ideal_Opcode(); 5182 int vlen = Matcher::vector_length(this, $src2); 5183 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5184 %} 5185 ins_pipe( pipe_slow ); 5186 %} 5187 5188 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5189 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5190 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5191 // src1 contains reduction identity 5192 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5193 match(Set dst (AddReductionVD src1 src2)); 5194 match(Set dst (MulReductionVD src1 src2)); 5195 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5196 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5197 ins_encode %{ 5198 int opcode = this->ideal_Opcode(); 5199 int vlen = Matcher::vector_length(this, $src2); 5200 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5201 %} 5202 ins_pipe( pipe_slow ); 5203 %} 5204 5205 // =======================Byte Reduction========================================== 5206 5207 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5208 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5209 match(Set dst (AddReductionVI src1 src2)); 5210 match(Set dst (AndReductionV src1 src2)); 5211 match(Set dst ( OrReductionV src1 src2)); 5212 match(Set dst (XorReductionV src1 src2)); 5213 match(Set dst (MinReductionV src1 src2)); 5214 match(Set dst (MaxReductionV src1 src2)); 5215 effect(TEMP vtmp1, TEMP vtmp2); 5216 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5217 ins_encode %{ 5218 int opcode = this->ideal_Opcode(); 5219 int vlen = Matcher::vector_length(this, $src2); 5220 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5226 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5227 match(Set dst (AddReductionVI src1 src2)); 5228 match(Set dst (AndReductionV src1 src2)); 5229 match(Set dst ( OrReductionV src1 src2)); 5230 match(Set dst (XorReductionV src1 src2)); 5231 match(Set dst (MinReductionV src1 src2)); 5232 match(Set dst (MaxReductionV src1 src2)); 5233 effect(TEMP vtmp1, TEMP vtmp2); 5234 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5235 ins_encode %{ 5236 int opcode = this->ideal_Opcode(); 5237 int vlen = Matcher::vector_length(this, $src2); 5238 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 // =======================Short Reduction========================================== 5244 5245 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5246 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5247 match(Set dst (AddReductionVI src1 src2)); 5248 match(Set dst (MulReductionVI src1 src2)); 5249 match(Set dst (AndReductionV src1 src2)); 5250 match(Set dst ( OrReductionV src1 src2)); 5251 match(Set dst (XorReductionV src1 src2)); 5252 match(Set dst (MinReductionV src1 src2)); 5253 match(Set dst (MaxReductionV src1 src2)); 5254 effect(TEMP vtmp1, TEMP vtmp2); 5255 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5256 ins_encode %{ 5257 int opcode = this->ideal_Opcode(); 5258 int vlen = Matcher::vector_length(this, $src2); 5259 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5260 %} 5261 ins_pipe( pipe_slow ); 5262 %} 5263 5264 // =======================Mul Reduction========================================== 5265 5266 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5267 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5268 Matcher::vector_length(n->in(2)) <= 32); // src2 5269 match(Set dst (MulReductionVI src1 src2)); 5270 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5271 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5272 ins_encode %{ 5273 int opcode = this->ideal_Opcode(); 5274 int vlen = Matcher::vector_length(this, $src2); 5275 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5276 %} 5277 ins_pipe( pipe_slow ); 5278 %} 5279 5280 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5281 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5282 Matcher::vector_length(n->in(2)) == 64); // src2 5283 match(Set dst (MulReductionVI src1 src2)); 5284 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5285 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5286 ins_encode %{ 5287 int opcode = this->ideal_Opcode(); 5288 int vlen = Matcher::vector_length(this, $src2); 5289 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5290 %} 5291 ins_pipe( pipe_slow ); 5292 %} 5293 5294 //--------------------Min/Max Float Reduction -------------------- 5295 // Float Min Reduction 5296 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5297 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5298 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5299 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5300 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5301 Matcher::vector_length(n->in(2)) == 2); 5302 match(Set dst (MinReductionV src1 src2)); 5303 match(Set dst (MaxReductionV src1 src2)); 5304 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5305 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5306 ins_encode %{ 5307 assert(UseAVX > 0, "sanity"); 5308 5309 int opcode = this->ideal_Opcode(); 5310 int vlen = Matcher::vector_length(this, $src2); 5311 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5312 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5318 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5319 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5320 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5321 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5322 Matcher::vector_length(n->in(2)) >= 4); 5323 match(Set dst (MinReductionV src1 src2)); 5324 match(Set dst (MaxReductionV src1 src2)); 5325 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5326 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5327 ins_encode %{ 5328 assert(UseAVX > 0, "sanity"); 5329 5330 int opcode = this->ideal_Opcode(); 5331 int vlen = Matcher::vector_length(this, $src2); 5332 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5333 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5334 %} 5335 ins_pipe( pipe_slow ); 5336 %} 5337 5338 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, 5339 legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5340 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5341 Matcher::vector_length(n->in(2)) == 2); 5342 match(Set dst (MinReductionV dst src)); 5343 match(Set dst (MaxReductionV dst src)); 5344 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5345 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5346 ins_encode %{ 5347 assert(UseAVX > 0, "sanity"); 5348 5349 int opcode = this->ideal_Opcode(); 5350 int vlen = Matcher::vector_length(this, $src); 5351 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5352 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 5358 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, 5359 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5360 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5361 Matcher::vector_length(n->in(2)) >= 4); 5362 match(Set dst (MinReductionV dst src)); 5363 match(Set dst (MaxReductionV dst src)); 5364 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5365 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5366 ins_encode %{ 5367 assert(UseAVX > 0, "sanity"); 5368 5369 int opcode = this->ideal_Opcode(); 5370 int vlen = Matcher::vector_length(this, $src); 5371 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5372 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ 5378 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5379 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5380 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5381 Matcher::vector_length(n->in(2)) == 2); 5382 match(Set dst (MinReductionV src1 src2)); 5383 match(Set dst (MaxReductionV src1 src2)); 5384 effect(TEMP dst, TEMP xtmp1); 5385 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} 5386 ins_encode %{ 5387 int opcode = this->ideal_Opcode(); 5388 int vlen = Matcher::vector_length(this, $src2); 5389 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5390 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5391 %} 5392 ins_pipe( pipe_slow ); 5393 %} 5394 5395 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ 5396 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5397 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5398 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5399 Matcher::vector_length(n->in(2)) >= 4); 5400 match(Set dst (MinReductionV src1 src2)); 5401 match(Set dst (MaxReductionV src1 src2)); 5402 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5403 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} 5404 ins_encode %{ 5405 int opcode = this->ideal_Opcode(); 5406 int vlen = Matcher::vector_length(this, $src2); 5407 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5408 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ 5414 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5415 Matcher::vector_length(n->in(2)) == 2); 5416 match(Set dst (MinReductionV dst src)); 5417 match(Set dst (MaxReductionV dst src)); 5418 effect(TEMP dst, TEMP xtmp1); 5419 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} 5420 ins_encode %{ 5421 int opcode = this->ideal_Opcode(); 5422 int vlen = Matcher::vector_length(this, $src); 5423 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5424 $xtmp1$$XMMRegister); 5425 %} 5426 ins_pipe( pipe_slow ); 5427 %} 5428 5429 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ 5430 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5431 Matcher::vector_length(n->in(2)) >= 4); 5432 match(Set dst (MinReductionV dst src)); 5433 match(Set dst (MaxReductionV dst src)); 5434 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5435 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} 5436 ins_encode %{ 5437 int opcode = this->ideal_Opcode(); 5438 int vlen = Matcher::vector_length(this, $src); 5439 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, 5440 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5441 %} 5442 ins_pipe( pipe_slow ); 5443 %} 5444 5445 //--------------------Min Double Reduction -------------------- 5446 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5447 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5448 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5449 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5450 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5451 Matcher::vector_length(n->in(2)) == 2); 5452 match(Set dst (MinReductionV src1 src2)); 5453 match(Set dst (MaxReductionV src1 src2)); 5454 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5455 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5456 ins_encode %{ 5457 assert(UseAVX > 0, "sanity"); 5458 5459 int opcode = this->ideal_Opcode(); 5460 int vlen = Matcher::vector_length(this, $src2); 5461 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5462 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5463 %} 5464 ins_pipe( pipe_slow ); 5465 %} 5466 5467 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, 5468 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5469 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5470 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5471 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5472 Matcher::vector_length(n->in(2)) >= 4); 5473 match(Set dst (MinReductionV src1 src2)); 5474 match(Set dst (MaxReductionV src1 src2)); 5475 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5476 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5477 ins_encode %{ 5478 assert(UseAVX > 0, "sanity"); 5479 5480 int opcode = this->ideal_Opcode(); 5481 int vlen = Matcher::vector_length(this, $src2); 5482 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5483 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5484 %} 5485 ins_pipe( pipe_slow ); 5486 %} 5487 5488 5489 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, 5490 legVec tmp3, legVec tmp4, rFlagsReg cr) %{ 5491 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5492 Matcher::vector_length(n->in(2)) == 2); 5493 match(Set dst (MinReductionV dst src)); 5494 match(Set dst (MaxReductionV dst src)); 5495 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5496 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5497 ins_encode %{ 5498 assert(UseAVX > 0, "sanity"); 5499 5500 int opcode = this->ideal_Opcode(); 5501 int vlen = Matcher::vector_length(this, $src); 5502 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5503 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, 5509 legVec tmp4, legVec tmp5, rFlagsReg cr) %{ 5510 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5511 Matcher::vector_length(n->in(2)) >= 4); 5512 match(Set dst (MinReductionV dst src)); 5513 match(Set dst (MaxReductionV dst src)); 5514 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5515 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5516 ins_encode %{ 5517 assert(UseAVX > 0, "sanity"); 5518 5519 int opcode = this->ideal_Opcode(); 5520 int vlen = Matcher::vector_length(this, $src); 5521 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5522 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5523 %} 5524 ins_pipe( pipe_slow ); 5525 %} 5526 5527 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ 5528 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5529 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5530 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5531 Matcher::vector_length(n->in(2)) == 2); 5532 match(Set dst (MinReductionV src1 src2)); 5533 match(Set dst (MaxReductionV src1 src2)); 5534 effect(TEMP dst, TEMP xtmp1); 5535 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} 5536 ins_encode %{ 5537 int opcode = this->ideal_Opcode(); 5538 int vlen = Matcher::vector_length(this, $src2); 5539 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, 5540 xnoreg, xnoreg, $xtmp1$$XMMRegister); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ 5546 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5547 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5548 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5549 Matcher::vector_length(n->in(2)) >= 4); 5550 match(Set dst (MinReductionV src1 src2)); 5551 match(Set dst (MaxReductionV src1 src2)); 5552 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5553 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} 5554 ins_encode %{ 5555 int opcode = this->ideal_Opcode(); 5556 int vlen = Matcher::vector_length(this, $src2); 5557 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, 5558 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5559 %} 5560 ins_pipe( pipe_slow ); 5561 %} 5562 5563 5564 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ 5565 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5566 Matcher::vector_length(n->in(2)) == 2); 5567 match(Set dst (MinReductionV dst src)); 5568 match(Set dst (MaxReductionV dst src)); 5569 effect(TEMP dst, TEMP xtmp1); 5570 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} 5571 ins_encode %{ 5572 int opcode = this->ideal_Opcode(); 5573 int vlen = Matcher::vector_length(this, $src); 5574 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5575 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ 5581 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5582 Matcher::vector_length(n->in(2)) >= 4); 5583 match(Set dst (MinReductionV dst src)); 5584 match(Set dst (MaxReductionV dst src)); 5585 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 5586 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} 5587 ins_encode %{ 5588 int opcode = this->ideal_Opcode(); 5589 int vlen = Matcher::vector_length(this, $src); 5590 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5591 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594 %} 5595 5596 // ====================VECTOR ARITHMETIC======================================= 5597 5598 // --------------------------------- ADD -------------------------------------- 5599 5600 // Bytes vector add 5601 instruct vaddB(vec dst, vec src) %{ 5602 predicate(UseAVX == 0); 5603 match(Set dst (AddVB dst src)); 5604 format %{ "paddb $dst,$src\t! add packedB" %} 5605 ins_encode %{ 5606 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5612 predicate(UseAVX > 0); 5613 match(Set dst (AddVB src1 src2)); 5614 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5615 ins_encode %{ 5616 int vlen_enc = vector_length_encoding(this); 5617 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5623 predicate((UseAVX > 0) && 5624 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5625 match(Set dst (AddVB src (LoadVector mem))); 5626 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5627 ins_encode %{ 5628 int vlen_enc = vector_length_encoding(this); 5629 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5630 %} 5631 ins_pipe( pipe_slow ); 5632 %} 5633 5634 // Shorts/Chars vector add 5635 instruct vaddS(vec dst, vec src) %{ 5636 predicate(UseAVX == 0); 5637 match(Set dst (AddVS dst src)); 5638 format %{ "paddw $dst,$src\t! add packedS" %} 5639 ins_encode %{ 5640 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5641 %} 5642 ins_pipe( pipe_slow ); 5643 %} 5644 5645 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5646 predicate(UseAVX > 0); 5647 match(Set dst (AddVS src1 src2)); 5648 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5649 ins_encode %{ 5650 int vlen_enc = vector_length_encoding(this); 5651 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5652 %} 5653 ins_pipe( pipe_slow ); 5654 %} 5655 5656 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5657 predicate((UseAVX > 0) && 5658 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5659 match(Set dst (AddVS src (LoadVector mem))); 5660 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5661 ins_encode %{ 5662 int vlen_enc = vector_length_encoding(this); 5663 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5664 %} 5665 ins_pipe( pipe_slow ); 5666 %} 5667 5668 // Integers vector add 5669 instruct vaddI(vec dst, vec src) %{ 5670 predicate(UseAVX == 0); 5671 match(Set dst (AddVI dst src)); 5672 format %{ "paddd $dst,$src\t! add packedI" %} 5673 ins_encode %{ 5674 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5675 %} 5676 ins_pipe( pipe_slow ); 5677 %} 5678 5679 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5680 predicate(UseAVX > 0); 5681 match(Set dst (AddVI src1 src2)); 5682 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5683 ins_encode %{ 5684 int vlen_enc = vector_length_encoding(this); 5685 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5686 %} 5687 ins_pipe( pipe_slow ); 5688 %} 5689 5690 5691 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5692 predicate((UseAVX > 0) && 5693 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5694 match(Set dst (AddVI src (LoadVector mem))); 5695 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5696 ins_encode %{ 5697 int vlen_enc = vector_length_encoding(this); 5698 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5699 %} 5700 ins_pipe( pipe_slow ); 5701 %} 5702 5703 // Longs vector add 5704 instruct vaddL(vec dst, vec src) %{ 5705 predicate(UseAVX == 0); 5706 match(Set dst (AddVL dst src)); 5707 format %{ "paddq $dst,$src\t! add packedL" %} 5708 ins_encode %{ 5709 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5715 predicate(UseAVX > 0); 5716 match(Set dst (AddVL src1 src2)); 5717 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5718 ins_encode %{ 5719 int vlen_enc = vector_length_encoding(this); 5720 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5726 predicate((UseAVX > 0) && 5727 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5728 match(Set dst (AddVL src (LoadVector mem))); 5729 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5730 ins_encode %{ 5731 int vlen_enc = vector_length_encoding(this); 5732 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5733 %} 5734 ins_pipe( pipe_slow ); 5735 %} 5736 5737 // Floats vector add 5738 instruct vaddF(vec dst, vec src) %{ 5739 predicate(UseAVX == 0); 5740 match(Set dst (AddVF dst src)); 5741 format %{ "addps $dst,$src\t! add packedF" %} 5742 ins_encode %{ 5743 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5749 predicate(UseAVX > 0); 5750 match(Set dst (AddVF src1 src2)); 5751 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5752 ins_encode %{ 5753 int vlen_enc = vector_length_encoding(this); 5754 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5755 %} 5756 ins_pipe( pipe_slow ); 5757 %} 5758 5759 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5760 predicate((UseAVX > 0) && 5761 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5762 match(Set dst (AddVF src (LoadVector mem))); 5763 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5764 ins_encode %{ 5765 int vlen_enc = vector_length_encoding(this); 5766 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 // Doubles vector add 5772 instruct vaddD(vec dst, vec src) %{ 5773 predicate(UseAVX == 0); 5774 match(Set dst (AddVD dst src)); 5775 format %{ "addpd $dst,$src\t! add packedD" %} 5776 ins_encode %{ 5777 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5778 %} 5779 ins_pipe( pipe_slow ); 5780 %} 5781 5782 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5783 predicate(UseAVX > 0); 5784 match(Set dst (AddVD src1 src2)); 5785 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5786 ins_encode %{ 5787 int vlen_enc = vector_length_encoding(this); 5788 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5794 predicate((UseAVX > 0) && 5795 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5796 match(Set dst (AddVD src (LoadVector mem))); 5797 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5798 ins_encode %{ 5799 int vlen_enc = vector_length_encoding(this); 5800 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5801 %} 5802 ins_pipe( pipe_slow ); 5803 %} 5804 5805 // --------------------------------- SUB -------------------------------------- 5806 5807 // Bytes vector sub 5808 instruct vsubB(vec dst, vec src) %{ 5809 predicate(UseAVX == 0); 5810 match(Set dst (SubVB dst src)); 5811 format %{ "psubb $dst,$src\t! sub packedB" %} 5812 ins_encode %{ 5813 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5819 predicate(UseAVX > 0); 5820 match(Set dst (SubVB src1 src2)); 5821 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5822 ins_encode %{ 5823 int vlen_enc = vector_length_encoding(this); 5824 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5830 predicate((UseAVX > 0) && 5831 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5832 match(Set dst (SubVB src (LoadVector mem))); 5833 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5834 ins_encode %{ 5835 int vlen_enc = vector_length_encoding(this); 5836 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 // Shorts/Chars vector sub 5842 instruct vsubS(vec dst, vec src) %{ 5843 predicate(UseAVX == 0); 5844 match(Set dst (SubVS dst src)); 5845 format %{ "psubw $dst,$src\t! sub packedS" %} 5846 ins_encode %{ 5847 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 5853 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5854 predicate(UseAVX > 0); 5855 match(Set dst (SubVS src1 src2)); 5856 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5857 ins_encode %{ 5858 int vlen_enc = vector_length_encoding(this); 5859 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5865 predicate((UseAVX > 0) && 5866 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5867 match(Set dst (SubVS src (LoadVector mem))); 5868 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5869 ins_encode %{ 5870 int vlen_enc = vector_length_encoding(this); 5871 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 // Integers vector sub 5877 instruct vsubI(vec dst, vec src) %{ 5878 predicate(UseAVX == 0); 5879 match(Set dst (SubVI dst src)); 5880 format %{ "psubd $dst,$src\t! sub packedI" %} 5881 ins_encode %{ 5882 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5888 predicate(UseAVX > 0); 5889 match(Set dst (SubVI src1 src2)); 5890 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5891 ins_encode %{ 5892 int vlen_enc = vector_length_encoding(this); 5893 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5899 predicate((UseAVX > 0) && 5900 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5901 match(Set dst (SubVI src (LoadVector mem))); 5902 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5903 ins_encode %{ 5904 int vlen_enc = vector_length_encoding(this); 5905 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5906 %} 5907 ins_pipe( pipe_slow ); 5908 %} 5909 5910 // Longs vector sub 5911 instruct vsubL(vec dst, vec src) %{ 5912 predicate(UseAVX == 0); 5913 match(Set dst (SubVL dst src)); 5914 format %{ "psubq $dst,$src\t! sub packedL" %} 5915 ins_encode %{ 5916 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5917 %} 5918 ins_pipe( pipe_slow ); 5919 %} 5920 5921 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5922 predicate(UseAVX > 0); 5923 match(Set dst (SubVL src1 src2)); 5924 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5925 ins_encode %{ 5926 int vlen_enc = vector_length_encoding(this); 5927 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 5933 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5934 predicate((UseAVX > 0) && 5935 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5936 match(Set dst (SubVL src (LoadVector mem))); 5937 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5938 ins_encode %{ 5939 int vlen_enc = vector_length_encoding(this); 5940 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 // Floats vector sub 5946 instruct vsubF(vec dst, vec src) %{ 5947 predicate(UseAVX == 0); 5948 match(Set dst (SubVF dst src)); 5949 format %{ "subps $dst,$src\t! sub packedF" %} 5950 ins_encode %{ 5951 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5957 predicate(UseAVX > 0); 5958 match(Set dst (SubVF src1 src2)); 5959 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5960 ins_encode %{ 5961 int vlen_enc = vector_length_encoding(this); 5962 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5968 predicate((UseAVX > 0) && 5969 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5970 match(Set dst (SubVF src (LoadVector mem))); 5971 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5972 ins_encode %{ 5973 int vlen_enc = vector_length_encoding(this); 5974 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5975 %} 5976 ins_pipe( pipe_slow ); 5977 %} 5978 5979 // Doubles vector sub 5980 instruct vsubD(vec dst, vec src) %{ 5981 predicate(UseAVX == 0); 5982 match(Set dst (SubVD dst src)); 5983 format %{ "subpd $dst,$src\t! sub packedD" %} 5984 ins_encode %{ 5985 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5991 predicate(UseAVX > 0); 5992 match(Set dst (SubVD src1 src2)); 5993 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5994 ins_encode %{ 5995 int vlen_enc = vector_length_encoding(this); 5996 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5997 %} 5998 ins_pipe( pipe_slow ); 5999 %} 6000 6001 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6002 predicate((UseAVX > 0) && 6003 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6004 match(Set dst (SubVD src (LoadVector mem))); 6005 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6006 ins_encode %{ 6007 int vlen_enc = vector_length_encoding(this); 6008 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 // --------------------------------- MUL -------------------------------------- 6014 6015 // Byte vector mul 6016 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6017 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6018 match(Set dst (MulVB src1 src2)); 6019 effect(TEMP dst, TEMP xtmp); 6020 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6021 ins_encode %{ 6022 assert(UseSSE > 3, "required"); 6023 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6024 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6025 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6026 __ psllw($dst$$XMMRegister, 8); 6027 __ psrlw($dst$$XMMRegister, 8); 6028 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6034 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6035 match(Set dst (MulVB src1 src2)); 6036 effect(TEMP dst, TEMP xtmp); 6037 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6038 ins_encode %{ 6039 assert(UseSSE > 3, "required"); 6040 // Odd-index elements 6041 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6042 __ psrlw($dst$$XMMRegister, 8); 6043 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6044 __ psrlw($xtmp$$XMMRegister, 8); 6045 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6046 __ psllw($dst$$XMMRegister, 8); 6047 // Even-index elements 6048 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6049 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6050 __ psllw($xtmp$$XMMRegister, 8); 6051 __ psrlw($xtmp$$XMMRegister, 8); 6052 // Combine 6053 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6054 %} 6055 ins_pipe( pipe_slow ); 6056 %} 6057 6058 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6059 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6060 match(Set dst (MulVB src1 src2)); 6061 effect(TEMP xtmp1, TEMP xtmp2); 6062 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6063 ins_encode %{ 6064 int vlen_enc = vector_length_encoding(this); 6065 // Odd-index elements 6066 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6067 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6068 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6069 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6070 // Even-index elements 6071 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6072 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6073 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6074 // Combine 6075 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6076 %} 6077 ins_pipe( pipe_slow ); 6078 %} 6079 6080 // Shorts/Chars vector mul 6081 instruct vmulS(vec dst, vec src) %{ 6082 predicate(UseAVX == 0); 6083 match(Set dst (MulVS dst src)); 6084 format %{ "pmullw $dst,$src\t! mul packedS" %} 6085 ins_encode %{ 6086 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6087 %} 6088 ins_pipe( pipe_slow ); 6089 %} 6090 6091 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6092 predicate(UseAVX > 0); 6093 match(Set dst (MulVS src1 src2)); 6094 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6095 ins_encode %{ 6096 int vlen_enc = vector_length_encoding(this); 6097 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6098 %} 6099 ins_pipe( pipe_slow ); 6100 %} 6101 6102 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6103 predicate((UseAVX > 0) && 6104 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6105 match(Set dst (MulVS src (LoadVector mem))); 6106 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6107 ins_encode %{ 6108 int vlen_enc = vector_length_encoding(this); 6109 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 // Integers vector mul 6115 instruct vmulI(vec dst, vec src) %{ 6116 predicate(UseAVX == 0); 6117 match(Set dst (MulVI dst src)); 6118 format %{ "pmulld $dst,$src\t! mul packedI" %} 6119 ins_encode %{ 6120 assert(UseSSE > 3, "required"); 6121 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6127 predicate(UseAVX > 0); 6128 match(Set dst (MulVI src1 src2)); 6129 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6130 ins_encode %{ 6131 int vlen_enc = vector_length_encoding(this); 6132 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6133 %} 6134 ins_pipe( pipe_slow ); 6135 %} 6136 6137 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6138 predicate((UseAVX > 0) && 6139 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6140 match(Set dst (MulVI src (LoadVector mem))); 6141 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6142 ins_encode %{ 6143 int vlen_enc = vector_length_encoding(this); 6144 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 // Longs vector mul 6150 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6151 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6152 VM_Version::supports_avx512dq()) || 6153 VM_Version::supports_avx512vldq()); 6154 match(Set dst (MulVL src1 src2)); 6155 ins_cost(500); 6156 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6157 ins_encode %{ 6158 assert(UseAVX > 2, "required"); 6159 int vlen_enc = vector_length_encoding(this); 6160 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6166 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6167 VM_Version::supports_avx512dq()) || 6168 (Matcher::vector_length_in_bytes(n) > 8 && 6169 VM_Version::supports_avx512vldq())); 6170 match(Set dst (MulVL src (LoadVector mem))); 6171 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6172 ins_cost(500); 6173 ins_encode %{ 6174 assert(UseAVX > 2, "required"); 6175 int vlen_enc = vector_length_encoding(this); 6176 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6177 %} 6178 ins_pipe( pipe_slow ); 6179 %} 6180 6181 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6182 predicate(UseAVX == 0); 6183 match(Set dst (MulVL src1 src2)); 6184 ins_cost(500); 6185 effect(TEMP dst, TEMP xtmp); 6186 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6187 ins_encode %{ 6188 assert(VM_Version::supports_sse4_1(), "required"); 6189 // Get the lo-hi products, only the lower 32 bits is in concerns 6190 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6191 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6192 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6193 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6194 __ psllq($dst$$XMMRegister, 32); 6195 // Get the lo-lo products 6196 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6197 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6198 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6199 %} 6200 ins_pipe( pipe_slow ); 6201 %} 6202 6203 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6204 predicate(UseAVX > 0 && 6205 ((Matcher::vector_length_in_bytes(n) == 64 && 6206 !VM_Version::supports_avx512dq()) || 6207 (Matcher::vector_length_in_bytes(n) < 64 && 6208 !VM_Version::supports_avx512vldq()))); 6209 match(Set dst (MulVL src1 src2)); 6210 effect(TEMP xtmp1, TEMP xtmp2); 6211 ins_cost(500); 6212 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6213 ins_encode %{ 6214 int vlen_enc = vector_length_encoding(this); 6215 // Get the lo-hi products, only the lower 32 bits is in concerns 6216 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6217 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6218 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6219 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6220 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6221 // Get the lo-lo products 6222 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6223 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6224 %} 6225 ins_pipe( pipe_slow ); 6226 %} 6227 6228 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6229 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6230 match(Set dst (MulVL src1 src2)); 6231 ins_cost(100); 6232 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6233 ins_encode %{ 6234 int vlen_enc = vector_length_encoding(this); 6235 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6241 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6242 match(Set dst (MulVL src1 src2)); 6243 ins_cost(100); 6244 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6245 ins_encode %{ 6246 int vlen_enc = vector_length_encoding(this); 6247 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 // Floats vector mul 6253 instruct vmulF(vec dst, vec src) %{ 6254 predicate(UseAVX == 0); 6255 match(Set dst (MulVF dst src)); 6256 format %{ "mulps $dst,$src\t! mul packedF" %} 6257 ins_encode %{ 6258 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6264 predicate(UseAVX > 0); 6265 match(Set dst (MulVF src1 src2)); 6266 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6267 ins_encode %{ 6268 int vlen_enc = vector_length_encoding(this); 6269 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6275 predicate((UseAVX > 0) && 6276 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6277 match(Set dst (MulVF src (LoadVector mem))); 6278 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6279 ins_encode %{ 6280 int vlen_enc = vector_length_encoding(this); 6281 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 // Doubles vector mul 6287 instruct vmulD(vec dst, vec src) %{ 6288 predicate(UseAVX == 0); 6289 match(Set dst (MulVD dst src)); 6290 format %{ "mulpd $dst,$src\t! mul packedD" %} 6291 ins_encode %{ 6292 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6293 %} 6294 ins_pipe( pipe_slow ); 6295 %} 6296 6297 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6298 predicate(UseAVX > 0); 6299 match(Set dst (MulVD src1 src2)); 6300 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6301 ins_encode %{ 6302 int vlen_enc = vector_length_encoding(this); 6303 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6309 predicate((UseAVX > 0) && 6310 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6311 match(Set dst (MulVD src (LoadVector mem))); 6312 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6313 ins_encode %{ 6314 int vlen_enc = vector_length_encoding(this); 6315 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6316 %} 6317 ins_pipe( pipe_slow ); 6318 %} 6319 6320 // --------------------------------- DIV -------------------------------------- 6321 6322 // Floats vector div 6323 instruct vdivF(vec dst, vec src) %{ 6324 predicate(UseAVX == 0); 6325 match(Set dst (DivVF dst src)); 6326 format %{ "divps $dst,$src\t! div packedF" %} 6327 ins_encode %{ 6328 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6334 predicate(UseAVX > 0); 6335 match(Set dst (DivVF src1 src2)); 6336 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6337 ins_encode %{ 6338 int vlen_enc = vector_length_encoding(this); 6339 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6340 %} 6341 ins_pipe( pipe_slow ); 6342 %} 6343 6344 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6345 predicate((UseAVX > 0) && 6346 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6347 match(Set dst (DivVF src (LoadVector mem))); 6348 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6349 ins_encode %{ 6350 int vlen_enc = vector_length_encoding(this); 6351 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6352 %} 6353 ins_pipe( pipe_slow ); 6354 %} 6355 6356 // Doubles vector div 6357 instruct vdivD(vec dst, vec src) %{ 6358 predicate(UseAVX == 0); 6359 match(Set dst (DivVD dst src)); 6360 format %{ "divpd $dst,$src\t! div packedD" %} 6361 ins_encode %{ 6362 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6363 %} 6364 ins_pipe( pipe_slow ); 6365 %} 6366 6367 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6368 predicate(UseAVX > 0); 6369 match(Set dst (DivVD src1 src2)); 6370 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6371 ins_encode %{ 6372 int vlen_enc = vector_length_encoding(this); 6373 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6374 %} 6375 ins_pipe( pipe_slow ); 6376 %} 6377 6378 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6379 predicate((UseAVX > 0) && 6380 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6381 match(Set dst (DivVD src (LoadVector mem))); 6382 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6383 ins_encode %{ 6384 int vlen_enc = vector_length_encoding(this); 6385 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6386 %} 6387 ins_pipe( pipe_slow ); 6388 %} 6389 6390 // ------------------------------ MinMax --------------------------------------- 6391 6392 // Byte, Short, Int vector Min/Max 6393 instruct minmax_reg_sse(vec dst, vec src) %{ 6394 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6395 UseAVX == 0); 6396 match(Set dst (MinV dst src)); 6397 match(Set dst (MaxV dst src)); 6398 format %{ "vector_minmax $dst,$src\t! " %} 6399 ins_encode %{ 6400 assert(UseSSE >= 4, "required"); 6401 6402 int opcode = this->ideal_Opcode(); 6403 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6404 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6405 %} 6406 ins_pipe( pipe_slow ); 6407 %} 6408 6409 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6410 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6411 UseAVX > 0); 6412 match(Set dst (MinV src1 src2)); 6413 match(Set dst (MaxV src1 src2)); 6414 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6415 ins_encode %{ 6416 int opcode = this->ideal_Opcode(); 6417 int vlen_enc = vector_length_encoding(this); 6418 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6419 6420 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 // Long vector Min/Max 6426 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6427 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6428 UseAVX == 0); 6429 match(Set dst (MinV dst src)); 6430 match(Set dst (MaxV src dst)); 6431 effect(TEMP dst, TEMP tmp); 6432 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6433 ins_encode %{ 6434 assert(UseSSE >= 4, "required"); 6435 6436 int opcode = this->ideal_Opcode(); 6437 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6438 assert(elem_bt == T_LONG, "sanity"); 6439 6440 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6441 %} 6442 ins_pipe( pipe_slow ); 6443 %} 6444 6445 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6446 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6447 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6448 match(Set dst (MinV src1 src2)); 6449 match(Set dst (MaxV src1 src2)); 6450 effect(TEMP dst); 6451 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6452 ins_encode %{ 6453 int vlen_enc = vector_length_encoding(this); 6454 int opcode = this->ideal_Opcode(); 6455 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6456 assert(elem_bt == T_LONG, "sanity"); 6457 6458 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6459 %} 6460 ins_pipe( pipe_slow ); 6461 %} 6462 6463 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6464 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6465 Matcher::vector_element_basic_type(n) == T_LONG); 6466 match(Set dst (MinV src1 src2)); 6467 match(Set dst (MaxV src1 src2)); 6468 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6469 ins_encode %{ 6470 assert(UseAVX > 2, "required"); 6471 6472 int vlen_enc = vector_length_encoding(this); 6473 int opcode = this->ideal_Opcode(); 6474 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6475 assert(elem_bt == T_LONG, "sanity"); 6476 6477 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 // Float/Double vector Min/Max 6483 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ 6484 predicate(VM_Version::supports_avx10_2() && 6485 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6486 match(Set dst (MinV a b)); 6487 match(Set dst (MaxV a b)); 6488 format %{ "vector_minmaxFP $dst, $a, $b" %} 6489 ins_encode %{ 6490 int vlen_enc = vector_length_encoding(this); 6491 int opcode = this->ideal_Opcode(); 6492 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6493 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6494 %} 6495 ins_pipe( pipe_slow ); 6496 %} 6497 6498 // Float/Double vector Min/Max 6499 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6500 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && 6501 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6502 UseAVX > 0); 6503 match(Set dst (MinV a b)); 6504 match(Set dst (MaxV a b)); 6505 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6506 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6507 ins_encode %{ 6508 assert(UseAVX > 0, "required"); 6509 6510 int opcode = this->ideal_Opcode(); 6511 int vlen_enc = vector_length_encoding(this); 6512 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6513 6514 __ vminmax_fp(opcode, elem_bt, 6515 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6516 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6517 %} 6518 ins_pipe( pipe_slow ); 6519 %} 6520 6521 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6522 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && 6523 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6524 match(Set dst (MinV a b)); 6525 match(Set dst (MaxV a b)); 6526 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6527 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6528 ins_encode %{ 6529 assert(UseAVX > 2, "required"); 6530 6531 int opcode = this->ideal_Opcode(); 6532 int vlen_enc = vector_length_encoding(this); 6533 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6534 6535 __ evminmax_fp(opcode, elem_bt, 6536 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6537 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6538 %} 6539 ins_pipe( pipe_slow ); 6540 %} 6541 6542 // ------------------------------ Unsigned vector Min/Max ---------------------- 6543 6544 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6545 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6546 match(Set dst (UMinV a b)); 6547 match(Set dst (UMaxV a b)); 6548 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6549 ins_encode %{ 6550 int opcode = this->ideal_Opcode(); 6551 int vlen_enc = vector_length_encoding(this); 6552 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6553 assert(is_integral_type(elem_bt), ""); 6554 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6555 %} 6556 ins_pipe( pipe_slow ); 6557 %} 6558 6559 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6560 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6561 match(Set dst (UMinV a (LoadVector b))); 6562 match(Set dst (UMaxV a (LoadVector b))); 6563 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6564 ins_encode %{ 6565 int opcode = this->ideal_Opcode(); 6566 int vlen_enc = vector_length_encoding(this); 6567 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6568 assert(is_integral_type(elem_bt), ""); 6569 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6570 %} 6571 ins_pipe( pipe_slow ); 6572 %} 6573 6574 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6575 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6576 match(Set dst (UMinV a b)); 6577 match(Set dst (UMaxV a b)); 6578 effect(TEMP xtmp1, TEMP xtmp2); 6579 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6580 ins_encode %{ 6581 int opcode = this->ideal_Opcode(); 6582 int vlen_enc = vector_length_encoding(this); 6583 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6584 %} 6585 ins_pipe( pipe_slow ); 6586 %} 6587 6588 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6589 match(Set dst (UMinV (Binary dst src2) mask)); 6590 match(Set dst (UMaxV (Binary dst src2) mask)); 6591 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6592 ins_encode %{ 6593 int vlen_enc = vector_length_encoding(this); 6594 BasicType bt = Matcher::vector_element_basic_type(this); 6595 int opc = this->ideal_Opcode(); 6596 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6597 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6603 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6604 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6605 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6606 ins_encode %{ 6607 int vlen_enc = vector_length_encoding(this); 6608 BasicType bt = Matcher::vector_element_basic_type(this); 6609 int opc = this->ideal_Opcode(); 6610 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6611 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6612 %} 6613 ins_pipe( pipe_slow ); 6614 %} 6615 6616 // --------------------------------- Signum/CopySign --------------------------- 6617 6618 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6619 match(Set dst (SignumF dst (Binary zero one))); 6620 effect(KILL cr); 6621 format %{ "signumF $dst, $dst" %} 6622 ins_encode %{ 6623 int opcode = this->ideal_Opcode(); 6624 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6630 match(Set dst (SignumD dst (Binary zero one))); 6631 effect(KILL cr); 6632 format %{ "signumD $dst, $dst" %} 6633 ins_encode %{ 6634 int opcode = this->ideal_Opcode(); 6635 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6641 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6642 match(Set dst (SignumVF src (Binary zero one))); 6643 match(Set dst (SignumVD src (Binary zero one))); 6644 effect(TEMP dst, TEMP xtmp1); 6645 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6646 ins_encode %{ 6647 int opcode = this->ideal_Opcode(); 6648 int vec_enc = vector_length_encoding(this); 6649 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6650 $xtmp1$$XMMRegister, vec_enc); 6651 %} 6652 ins_pipe( pipe_slow ); 6653 %} 6654 6655 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6656 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6657 match(Set dst (SignumVF src (Binary zero one))); 6658 match(Set dst (SignumVD src (Binary zero one))); 6659 effect(TEMP dst, TEMP ktmp1); 6660 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6661 ins_encode %{ 6662 int opcode = this->ideal_Opcode(); 6663 int vec_enc = vector_length_encoding(this); 6664 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6665 $ktmp1$$KRegister, vec_enc); 6666 %} 6667 ins_pipe( pipe_slow ); 6668 %} 6669 6670 // --------------------------------------- 6671 // For copySign use 0xE4 as writemask for vpternlog 6672 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6673 // C (xmm2) is set to 0x7FFFFFFF 6674 // Wherever xmm2 is 0, we want to pick from B (sign) 6675 // Wherever xmm2 is 1, we want to pick from A (src) 6676 // 6677 // A B C Result 6678 // 0 0 0 0 6679 // 0 0 1 0 6680 // 0 1 0 1 6681 // 0 1 1 0 6682 // 1 0 0 0 6683 // 1 0 1 1 6684 // 1 1 0 1 6685 // 1 1 1 1 6686 // 6687 // Result going from high bit to low bit is 0x11100100 = 0xe4 6688 // --------------------------------------- 6689 6690 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6691 match(Set dst (CopySignF dst src)); 6692 effect(TEMP tmp1, TEMP tmp2); 6693 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6694 ins_encode %{ 6695 __ movl($tmp2$$Register, 0x7FFFFFFF); 6696 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6697 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6703 match(Set dst (CopySignD dst (Binary src zero))); 6704 ins_cost(100); 6705 effect(TEMP tmp1, TEMP tmp2); 6706 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6707 ins_encode %{ 6708 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6709 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6710 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6711 %} 6712 ins_pipe( pipe_slow ); 6713 %} 6714 6715 //----------------------------- CompressBits/ExpandBits ------------------------ 6716 6717 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6718 predicate(n->bottom_type()->isa_int()); 6719 match(Set dst (CompressBits src mask)); 6720 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6721 ins_encode %{ 6722 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6728 predicate(n->bottom_type()->isa_int()); 6729 match(Set dst (ExpandBits src mask)); 6730 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6731 ins_encode %{ 6732 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6733 %} 6734 ins_pipe( pipe_slow ); 6735 %} 6736 6737 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6738 predicate(n->bottom_type()->isa_int()); 6739 match(Set dst (CompressBits src (LoadI mask))); 6740 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6741 ins_encode %{ 6742 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6748 predicate(n->bottom_type()->isa_int()); 6749 match(Set dst (ExpandBits src (LoadI mask))); 6750 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6751 ins_encode %{ 6752 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 // --------------------------------- Sqrt -------------------------------------- 6758 6759 instruct vsqrtF_reg(vec dst, vec src) %{ 6760 match(Set dst (SqrtVF src)); 6761 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6762 ins_encode %{ 6763 assert(UseAVX > 0, "required"); 6764 int vlen_enc = vector_length_encoding(this); 6765 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vsqrtF_mem(vec dst, memory mem) %{ 6771 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6772 match(Set dst (SqrtVF (LoadVector mem))); 6773 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6774 ins_encode %{ 6775 assert(UseAVX > 0, "required"); 6776 int vlen_enc = vector_length_encoding(this); 6777 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 // Floating point vector sqrt 6783 instruct vsqrtD_reg(vec dst, vec src) %{ 6784 match(Set dst (SqrtVD src)); 6785 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6786 ins_encode %{ 6787 assert(UseAVX > 0, "required"); 6788 int vlen_enc = vector_length_encoding(this); 6789 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vsqrtD_mem(vec dst, memory mem) %{ 6795 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6796 match(Set dst (SqrtVD (LoadVector mem))); 6797 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6798 ins_encode %{ 6799 assert(UseAVX > 0, "required"); 6800 int vlen_enc = vector_length_encoding(this); 6801 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6802 %} 6803 ins_pipe( pipe_slow ); 6804 %} 6805 6806 // ------------------------------ Shift --------------------------------------- 6807 6808 // Left and right shift count vectors are the same on x86 6809 // (only lowest bits of xmm reg are used for count). 6810 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6811 match(Set dst (LShiftCntV cnt)); 6812 match(Set dst (RShiftCntV cnt)); 6813 format %{ "movdl $dst,$cnt\t! load shift count" %} 6814 ins_encode %{ 6815 __ movdl($dst$$XMMRegister, $cnt$$Register); 6816 %} 6817 ins_pipe( pipe_slow ); 6818 %} 6819 6820 // Byte vector shift 6821 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6822 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6823 match(Set dst ( LShiftVB src shift)); 6824 match(Set dst ( RShiftVB src shift)); 6825 match(Set dst (URShiftVB src shift)); 6826 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6827 format %{"vector_byte_shift $dst,$src,$shift" %} 6828 ins_encode %{ 6829 assert(UseSSE > 3, "required"); 6830 int opcode = this->ideal_Opcode(); 6831 bool sign = (opcode != Op_URShiftVB); 6832 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6833 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6834 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6835 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6836 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6842 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6843 UseAVX <= 1); 6844 match(Set dst ( LShiftVB src shift)); 6845 match(Set dst ( RShiftVB src shift)); 6846 match(Set dst (URShiftVB src shift)); 6847 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6848 format %{"vector_byte_shift $dst,$src,$shift" %} 6849 ins_encode %{ 6850 assert(UseSSE > 3, "required"); 6851 int opcode = this->ideal_Opcode(); 6852 bool sign = (opcode != Op_URShiftVB); 6853 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6854 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6855 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6856 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6857 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6858 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6859 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6860 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6861 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6862 %} 6863 ins_pipe( pipe_slow ); 6864 %} 6865 6866 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6867 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6868 UseAVX > 1); 6869 match(Set dst ( LShiftVB src shift)); 6870 match(Set dst ( RShiftVB src shift)); 6871 match(Set dst (URShiftVB src shift)); 6872 effect(TEMP dst, TEMP tmp); 6873 format %{"vector_byte_shift $dst,$src,$shift" %} 6874 ins_encode %{ 6875 int opcode = this->ideal_Opcode(); 6876 bool sign = (opcode != Op_URShiftVB); 6877 int vlen_enc = Assembler::AVX_256bit; 6878 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6879 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6880 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6881 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6882 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6888 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6889 match(Set dst ( LShiftVB src shift)); 6890 match(Set dst ( RShiftVB src shift)); 6891 match(Set dst (URShiftVB src shift)); 6892 effect(TEMP dst, TEMP tmp); 6893 format %{"vector_byte_shift $dst,$src,$shift" %} 6894 ins_encode %{ 6895 assert(UseAVX > 1, "required"); 6896 int opcode = this->ideal_Opcode(); 6897 bool sign = (opcode != Op_URShiftVB); 6898 int vlen_enc = Assembler::AVX_256bit; 6899 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6900 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6901 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6902 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6903 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6904 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6905 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6906 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6907 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6913 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6914 match(Set dst ( LShiftVB src shift)); 6915 match(Set dst (RShiftVB src shift)); 6916 match(Set dst (URShiftVB src shift)); 6917 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6918 format %{"vector_byte_shift $dst,$src,$shift" %} 6919 ins_encode %{ 6920 assert(UseAVX > 2, "required"); 6921 int opcode = this->ideal_Opcode(); 6922 bool sign = (opcode != Op_URShiftVB); 6923 int vlen_enc = Assembler::AVX_512bit; 6924 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6925 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6926 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6927 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6928 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6929 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6930 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6931 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6932 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6933 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6934 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6935 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 // Shorts vector logical right shift produces incorrect Java result 6941 // for negative data because java code convert short value into int with 6942 // sign extension before a shift. But char vectors are fine since chars are 6943 // unsigned values. 6944 // Shorts/Chars vector left shift 6945 instruct vshiftS(vec dst, vec src, vec shift) %{ 6946 predicate(!n->as_ShiftV()->is_var_shift()); 6947 match(Set dst ( LShiftVS src shift)); 6948 match(Set dst ( RShiftVS src shift)); 6949 match(Set dst (URShiftVS src shift)); 6950 effect(TEMP dst, USE src, USE shift); 6951 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6952 ins_encode %{ 6953 int opcode = this->ideal_Opcode(); 6954 if (UseAVX > 0) { 6955 int vlen_enc = vector_length_encoding(this); 6956 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6957 } else { 6958 int vlen = Matcher::vector_length(this); 6959 if (vlen == 2) { 6960 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6961 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6962 } else if (vlen == 4) { 6963 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6964 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6965 } else { 6966 assert (vlen == 8, "sanity"); 6967 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6968 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6969 } 6970 } 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 // Integers vector left shift 6976 instruct vshiftI(vec dst, vec src, vec shift) %{ 6977 predicate(!n->as_ShiftV()->is_var_shift()); 6978 match(Set dst ( LShiftVI src shift)); 6979 match(Set dst ( RShiftVI src shift)); 6980 match(Set dst (URShiftVI src shift)); 6981 effect(TEMP dst, USE src, USE shift); 6982 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6983 ins_encode %{ 6984 int opcode = this->ideal_Opcode(); 6985 if (UseAVX > 0) { 6986 int vlen_enc = vector_length_encoding(this); 6987 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6988 } else { 6989 int vlen = Matcher::vector_length(this); 6990 if (vlen == 2) { 6991 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6992 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6993 } else { 6994 assert(vlen == 4, "sanity"); 6995 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6996 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6997 } 6998 } 6999 %} 7000 ins_pipe( pipe_slow ); 7001 %} 7002 7003 // Integers vector left constant shift 7004 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7005 match(Set dst (LShiftVI src (LShiftCntV shift))); 7006 match(Set dst (RShiftVI src (RShiftCntV shift))); 7007 match(Set dst (URShiftVI src (RShiftCntV shift))); 7008 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7009 ins_encode %{ 7010 int opcode = this->ideal_Opcode(); 7011 if (UseAVX > 0) { 7012 int vector_len = vector_length_encoding(this); 7013 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7014 } else { 7015 int vlen = Matcher::vector_length(this); 7016 if (vlen == 2) { 7017 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7018 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7019 } else { 7020 assert(vlen == 4, "sanity"); 7021 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7022 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7023 } 7024 } 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 // Longs vector shift 7030 instruct vshiftL(vec dst, vec src, vec shift) %{ 7031 predicate(!n->as_ShiftV()->is_var_shift()); 7032 match(Set dst ( LShiftVL src shift)); 7033 match(Set dst (URShiftVL src shift)); 7034 effect(TEMP dst, USE src, USE shift); 7035 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7036 ins_encode %{ 7037 int opcode = this->ideal_Opcode(); 7038 if (UseAVX > 0) { 7039 int vlen_enc = vector_length_encoding(this); 7040 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7041 } else { 7042 assert(Matcher::vector_length(this) == 2, ""); 7043 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7044 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7045 } 7046 %} 7047 ins_pipe( pipe_slow ); 7048 %} 7049 7050 // Longs vector constant shift 7051 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7052 match(Set dst (LShiftVL src (LShiftCntV shift))); 7053 match(Set dst (URShiftVL src (RShiftCntV shift))); 7054 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7055 ins_encode %{ 7056 int opcode = this->ideal_Opcode(); 7057 if (UseAVX > 0) { 7058 int vector_len = vector_length_encoding(this); 7059 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7060 } else { 7061 assert(Matcher::vector_length(this) == 2, ""); 7062 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7063 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7064 } 7065 %} 7066 ins_pipe( pipe_slow ); 7067 %} 7068 7069 // -------------------ArithmeticRightShift ----------------------------------- 7070 // Long vector arithmetic right shift 7071 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7072 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7073 match(Set dst (RShiftVL src shift)); 7074 effect(TEMP dst, TEMP tmp); 7075 format %{ "vshiftq $dst,$src,$shift" %} 7076 ins_encode %{ 7077 uint vlen = Matcher::vector_length(this); 7078 if (vlen == 2) { 7079 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7080 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7081 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7082 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7083 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7084 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7085 } else { 7086 assert(vlen == 4, "sanity"); 7087 assert(UseAVX > 1, "required"); 7088 int vlen_enc = Assembler::AVX_256bit; 7089 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7090 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7091 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7092 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7093 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7094 } 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7100 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7101 match(Set dst (RShiftVL src shift)); 7102 format %{ "vshiftq $dst,$src,$shift" %} 7103 ins_encode %{ 7104 int vlen_enc = vector_length_encoding(this); 7105 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 // ------------------- Variable Shift ----------------------------- 7111 // Byte variable shift 7112 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7113 predicate(Matcher::vector_length(n) <= 8 && 7114 n->as_ShiftV()->is_var_shift() && 7115 !VM_Version::supports_avx512bw()); 7116 match(Set dst ( LShiftVB src shift)); 7117 match(Set dst ( RShiftVB src shift)); 7118 match(Set dst (URShiftVB src shift)); 7119 effect(TEMP dst, TEMP vtmp); 7120 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7121 ins_encode %{ 7122 assert(UseAVX >= 2, "required"); 7123 7124 int opcode = this->ideal_Opcode(); 7125 int vlen_enc = Assembler::AVX_128bit; 7126 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7127 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7133 predicate(Matcher::vector_length(n) == 16 && 7134 n->as_ShiftV()->is_var_shift() && 7135 !VM_Version::supports_avx512bw()); 7136 match(Set dst ( LShiftVB src shift)); 7137 match(Set dst ( RShiftVB src shift)); 7138 match(Set dst (URShiftVB src shift)); 7139 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7140 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7141 ins_encode %{ 7142 assert(UseAVX >= 2, "required"); 7143 7144 int opcode = this->ideal_Opcode(); 7145 int vlen_enc = Assembler::AVX_128bit; 7146 // Shift lower half and get word result in dst 7147 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7148 7149 // Shift upper half and get word result in vtmp1 7150 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7151 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7152 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7153 7154 // Merge and down convert the two word results to byte in dst 7155 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7161 predicate(Matcher::vector_length(n) == 32 && 7162 n->as_ShiftV()->is_var_shift() && 7163 !VM_Version::supports_avx512bw()); 7164 match(Set dst ( LShiftVB src shift)); 7165 match(Set dst ( RShiftVB src shift)); 7166 match(Set dst (URShiftVB src shift)); 7167 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7168 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7169 ins_encode %{ 7170 assert(UseAVX >= 2, "required"); 7171 7172 int opcode = this->ideal_Opcode(); 7173 int vlen_enc = Assembler::AVX_128bit; 7174 // Process lower 128 bits and get result in dst 7175 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7176 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7177 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7178 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7179 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7180 7181 // Process higher 128 bits and get result in vtmp3 7182 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7183 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7184 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7185 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7186 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7187 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7188 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7189 7190 // Merge the two results in dst 7191 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7197 predicate(Matcher::vector_length(n) <= 32 && 7198 n->as_ShiftV()->is_var_shift() && 7199 VM_Version::supports_avx512bw()); 7200 match(Set dst ( LShiftVB src shift)); 7201 match(Set dst ( RShiftVB src shift)); 7202 match(Set dst (URShiftVB src shift)); 7203 effect(TEMP dst, TEMP vtmp); 7204 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7205 ins_encode %{ 7206 assert(UseAVX > 2, "required"); 7207 7208 int opcode = this->ideal_Opcode(); 7209 int vlen_enc = vector_length_encoding(this); 7210 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7216 predicate(Matcher::vector_length(n) == 64 && 7217 n->as_ShiftV()->is_var_shift() && 7218 VM_Version::supports_avx512bw()); 7219 match(Set dst ( LShiftVB src shift)); 7220 match(Set dst ( RShiftVB src shift)); 7221 match(Set dst (URShiftVB src shift)); 7222 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7223 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7224 ins_encode %{ 7225 assert(UseAVX > 2, "required"); 7226 7227 int opcode = this->ideal_Opcode(); 7228 int vlen_enc = Assembler::AVX_256bit; 7229 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7230 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7231 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7232 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7233 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 // Short variable shift 7239 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7240 predicate(Matcher::vector_length(n) <= 8 && 7241 n->as_ShiftV()->is_var_shift() && 7242 !VM_Version::supports_avx512bw()); 7243 match(Set dst ( LShiftVS src shift)); 7244 match(Set dst ( RShiftVS src shift)); 7245 match(Set dst (URShiftVS src shift)); 7246 effect(TEMP dst, TEMP vtmp); 7247 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7248 ins_encode %{ 7249 assert(UseAVX >= 2, "required"); 7250 7251 int opcode = this->ideal_Opcode(); 7252 bool sign = (opcode != Op_URShiftVS); 7253 int vlen_enc = Assembler::AVX_256bit; 7254 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7255 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7256 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7257 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7258 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7259 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7260 %} 7261 ins_pipe( pipe_slow ); 7262 %} 7263 7264 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7265 predicate(Matcher::vector_length(n) == 16 && 7266 n->as_ShiftV()->is_var_shift() && 7267 !VM_Version::supports_avx512bw()); 7268 match(Set dst ( LShiftVS src shift)); 7269 match(Set dst ( RShiftVS src shift)); 7270 match(Set dst (URShiftVS src shift)); 7271 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7272 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7273 ins_encode %{ 7274 assert(UseAVX >= 2, "required"); 7275 7276 int opcode = this->ideal_Opcode(); 7277 bool sign = (opcode != Op_URShiftVS); 7278 int vlen_enc = Assembler::AVX_256bit; 7279 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7280 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7281 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7282 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7283 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7284 7285 // Shift upper half, with result in dst using vtmp1 as TEMP 7286 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7287 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7288 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7289 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7290 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7291 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7292 7293 // Merge lower and upper half result into dst 7294 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7295 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7296 %} 7297 ins_pipe( pipe_slow ); 7298 %} 7299 7300 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7301 predicate(n->as_ShiftV()->is_var_shift() && 7302 VM_Version::supports_avx512bw()); 7303 match(Set dst ( LShiftVS src shift)); 7304 match(Set dst ( RShiftVS src shift)); 7305 match(Set dst (URShiftVS src shift)); 7306 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7307 ins_encode %{ 7308 assert(UseAVX > 2, "required"); 7309 7310 int opcode = this->ideal_Opcode(); 7311 int vlen_enc = vector_length_encoding(this); 7312 if (!VM_Version::supports_avx512vl()) { 7313 vlen_enc = Assembler::AVX_512bit; 7314 } 7315 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7316 %} 7317 ins_pipe( pipe_slow ); 7318 %} 7319 7320 //Integer variable shift 7321 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7322 predicate(n->as_ShiftV()->is_var_shift()); 7323 match(Set dst ( LShiftVI src shift)); 7324 match(Set dst ( RShiftVI src shift)); 7325 match(Set dst (URShiftVI src shift)); 7326 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7327 ins_encode %{ 7328 assert(UseAVX >= 2, "required"); 7329 7330 int opcode = this->ideal_Opcode(); 7331 int vlen_enc = vector_length_encoding(this); 7332 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 //Long variable shift 7338 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7339 predicate(n->as_ShiftV()->is_var_shift()); 7340 match(Set dst ( LShiftVL src shift)); 7341 match(Set dst (URShiftVL src shift)); 7342 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7343 ins_encode %{ 7344 assert(UseAVX >= 2, "required"); 7345 7346 int opcode = this->ideal_Opcode(); 7347 int vlen_enc = vector_length_encoding(this); 7348 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7349 %} 7350 ins_pipe( pipe_slow ); 7351 %} 7352 7353 //Long variable right shift arithmetic 7354 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7355 predicate(Matcher::vector_length(n) <= 4 && 7356 n->as_ShiftV()->is_var_shift() && 7357 UseAVX == 2); 7358 match(Set dst (RShiftVL src shift)); 7359 effect(TEMP dst, TEMP vtmp); 7360 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7361 ins_encode %{ 7362 int opcode = this->ideal_Opcode(); 7363 int vlen_enc = vector_length_encoding(this); 7364 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7365 $vtmp$$XMMRegister); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7371 predicate(n->as_ShiftV()->is_var_shift() && 7372 UseAVX > 2); 7373 match(Set dst (RShiftVL src shift)); 7374 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7375 ins_encode %{ 7376 int opcode = this->ideal_Opcode(); 7377 int vlen_enc = vector_length_encoding(this); 7378 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7379 %} 7380 ins_pipe( pipe_slow ); 7381 %} 7382 7383 // --------------------------------- AND -------------------------------------- 7384 7385 instruct vand(vec dst, vec src) %{ 7386 predicate(UseAVX == 0); 7387 match(Set dst (AndV dst src)); 7388 format %{ "pand $dst,$src\t! and vectors" %} 7389 ins_encode %{ 7390 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7396 predicate(UseAVX > 0); 7397 match(Set dst (AndV src1 src2)); 7398 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7399 ins_encode %{ 7400 int vlen_enc = vector_length_encoding(this); 7401 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 instruct vand_mem(vec dst, vec src, memory mem) %{ 7407 predicate((UseAVX > 0) && 7408 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7409 match(Set dst (AndV src (LoadVector mem))); 7410 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7411 ins_encode %{ 7412 int vlen_enc = vector_length_encoding(this); 7413 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7414 %} 7415 ins_pipe( pipe_slow ); 7416 %} 7417 7418 // --------------------------------- OR --------------------------------------- 7419 7420 instruct vor(vec dst, vec src) %{ 7421 predicate(UseAVX == 0); 7422 match(Set dst (OrV dst src)); 7423 format %{ "por $dst,$src\t! or vectors" %} 7424 ins_encode %{ 7425 __ por($dst$$XMMRegister, $src$$XMMRegister); 7426 %} 7427 ins_pipe( pipe_slow ); 7428 %} 7429 7430 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7431 predicate(UseAVX > 0); 7432 match(Set dst (OrV src1 src2)); 7433 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7434 ins_encode %{ 7435 int vlen_enc = vector_length_encoding(this); 7436 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7437 %} 7438 ins_pipe( pipe_slow ); 7439 %} 7440 7441 instruct vor_mem(vec dst, vec src, memory mem) %{ 7442 predicate((UseAVX > 0) && 7443 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7444 match(Set dst (OrV src (LoadVector mem))); 7445 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7446 ins_encode %{ 7447 int vlen_enc = vector_length_encoding(this); 7448 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7449 %} 7450 ins_pipe( pipe_slow ); 7451 %} 7452 7453 // --------------------------------- XOR -------------------------------------- 7454 7455 instruct vxor(vec dst, vec src) %{ 7456 predicate(UseAVX == 0); 7457 match(Set dst (XorV dst src)); 7458 format %{ "pxor $dst,$src\t! xor vectors" %} 7459 ins_encode %{ 7460 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7461 %} 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7466 predicate(UseAVX > 0); 7467 match(Set dst (XorV src1 src2)); 7468 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7469 ins_encode %{ 7470 int vlen_enc = vector_length_encoding(this); 7471 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7472 %} 7473 ins_pipe( pipe_slow ); 7474 %} 7475 7476 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7477 predicate((UseAVX > 0) && 7478 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7479 match(Set dst (XorV src (LoadVector mem))); 7480 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7481 ins_encode %{ 7482 int vlen_enc = vector_length_encoding(this); 7483 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7484 %} 7485 ins_pipe( pipe_slow ); 7486 %} 7487 7488 // --------------------------------- VectorCast -------------------------------------- 7489 7490 instruct vcastBtoX(vec dst, vec src) %{ 7491 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7492 match(Set dst (VectorCastB2X src)); 7493 format %{ "vector_cast_b2x $dst,$src\t!" %} 7494 ins_encode %{ 7495 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7496 int vlen_enc = vector_length_encoding(this); 7497 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 instruct vcastBtoD(legVec dst, legVec src) %{ 7503 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7504 match(Set dst (VectorCastB2X src)); 7505 format %{ "vector_cast_b2x $dst,$src\t!" %} 7506 ins_encode %{ 7507 int vlen_enc = vector_length_encoding(this); 7508 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7509 %} 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 instruct castStoX(vec dst, vec src) %{ 7514 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7515 Matcher::vector_length(n->in(1)) <= 8 && // src 7516 Matcher::vector_element_basic_type(n) == T_BYTE); 7517 match(Set dst (VectorCastS2X src)); 7518 format %{ "vector_cast_s2x $dst,$src" %} 7519 ins_encode %{ 7520 assert(UseAVX > 0, "required"); 7521 7522 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7523 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7529 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7530 Matcher::vector_length(n->in(1)) == 16 && // src 7531 Matcher::vector_element_basic_type(n) == T_BYTE); 7532 effect(TEMP dst, TEMP vtmp); 7533 match(Set dst (VectorCastS2X src)); 7534 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7535 ins_encode %{ 7536 assert(UseAVX > 0, "required"); 7537 7538 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7539 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7540 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7541 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 instruct vcastStoX_evex(vec dst, vec src) %{ 7547 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7548 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7549 match(Set dst (VectorCastS2X src)); 7550 format %{ "vector_cast_s2x $dst,$src\t!" %} 7551 ins_encode %{ 7552 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7553 int src_vlen_enc = vector_length_encoding(this, $src); 7554 int vlen_enc = vector_length_encoding(this); 7555 switch (to_elem_bt) { 7556 case T_BYTE: 7557 if (!VM_Version::supports_avx512vl()) { 7558 vlen_enc = Assembler::AVX_512bit; 7559 } 7560 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7561 break; 7562 case T_INT: 7563 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7564 break; 7565 case T_FLOAT: 7566 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7567 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7568 break; 7569 case T_LONG: 7570 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7571 break; 7572 case T_DOUBLE: { 7573 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7574 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7575 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7576 break; 7577 } 7578 default: 7579 ShouldNotReachHere(); 7580 } 7581 %} 7582 ins_pipe( pipe_slow ); 7583 %} 7584 7585 instruct castItoX(vec dst, vec src) %{ 7586 predicate(UseAVX <= 2 && 7587 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7588 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7589 match(Set dst (VectorCastI2X src)); 7590 format %{ "vector_cast_i2x $dst,$src" %} 7591 ins_encode %{ 7592 assert(UseAVX > 0, "required"); 7593 7594 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7595 int vlen_enc = vector_length_encoding(this, $src); 7596 7597 if (to_elem_bt == T_BYTE) { 7598 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7599 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7600 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7601 } else { 7602 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7603 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7604 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7605 } 7606 %} 7607 ins_pipe( pipe_slow ); 7608 %} 7609 7610 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7611 predicate(UseAVX <= 2 && 7612 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7613 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7614 match(Set dst (VectorCastI2X src)); 7615 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7616 effect(TEMP dst, TEMP vtmp); 7617 ins_encode %{ 7618 assert(UseAVX > 0, "required"); 7619 7620 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7621 int vlen_enc = vector_length_encoding(this, $src); 7622 7623 if (to_elem_bt == T_BYTE) { 7624 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7625 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7626 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7627 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7628 } else { 7629 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7630 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7631 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7632 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7633 } 7634 %} 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 instruct vcastItoX_evex(vec dst, vec src) %{ 7639 predicate(UseAVX > 2 || 7640 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7641 match(Set dst (VectorCastI2X src)); 7642 format %{ "vector_cast_i2x $dst,$src\t!" %} 7643 ins_encode %{ 7644 assert(UseAVX > 0, "required"); 7645 7646 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7647 int src_vlen_enc = vector_length_encoding(this, $src); 7648 int dst_vlen_enc = vector_length_encoding(this); 7649 switch (dst_elem_bt) { 7650 case T_BYTE: 7651 if (!VM_Version::supports_avx512vl()) { 7652 src_vlen_enc = Assembler::AVX_512bit; 7653 } 7654 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7655 break; 7656 case T_SHORT: 7657 if (!VM_Version::supports_avx512vl()) { 7658 src_vlen_enc = Assembler::AVX_512bit; 7659 } 7660 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7661 break; 7662 case T_FLOAT: 7663 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7664 break; 7665 case T_LONG: 7666 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7667 break; 7668 case T_DOUBLE: 7669 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7670 break; 7671 default: 7672 ShouldNotReachHere(); 7673 } 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vcastLtoBS(vec dst, vec src) %{ 7679 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7680 UseAVX <= 2); 7681 match(Set dst (VectorCastL2X src)); 7682 format %{ "vector_cast_l2x $dst,$src" %} 7683 ins_encode %{ 7684 assert(UseAVX > 0, "required"); 7685 7686 int vlen = Matcher::vector_length_in_bytes(this, $src); 7687 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7688 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7689 : ExternalAddress(vector_int_to_short_mask()); 7690 if (vlen <= 16) { 7691 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7692 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7693 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7694 } else { 7695 assert(vlen <= 32, "required"); 7696 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7697 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7698 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7699 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7700 } 7701 if (to_elem_bt == T_BYTE) { 7702 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7703 } 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vcastLtoX_evex(vec dst, vec src) %{ 7709 predicate(UseAVX > 2 || 7710 (Matcher::vector_element_basic_type(n) == T_INT || 7711 Matcher::vector_element_basic_type(n) == T_FLOAT || 7712 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7713 match(Set dst (VectorCastL2X src)); 7714 format %{ "vector_cast_l2x $dst,$src\t!" %} 7715 ins_encode %{ 7716 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7717 int vlen = Matcher::vector_length_in_bytes(this, $src); 7718 int vlen_enc = vector_length_encoding(this, $src); 7719 switch (to_elem_bt) { 7720 case T_BYTE: 7721 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7722 vlen_enc = Assembler::AVX_512bit; 7723 } 7724 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7725 break; 7726 case T_SHORT: 7727 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7728 vlen_enc = Assembler::AVX_512bit; 7729 } 7730 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7731 break; 7732 case T_INT: 7733 if (vlen == 8) { 7734 if ($dst$$XMMRegister != $src$$XMMRegister) { 7735 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7736 } 7737 } else if (vlen == 16) { 7738 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7739 } else if (vlen == 32) { 7740 if (UseAVX > 2) { 7741 if (!VM_Version::supports_avx512vl()) { 7742 vlen_enc = Assembler::AVX_512bit; 7743 } 7744 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7745 } else { 7746 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7747 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7748 } 7749 } else { // vlen == 64 7750 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7751 } 7752 break; 7753 case T_FLOAT: 7754 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7755 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7756 break; 7757 case T_DOUBLE: 7758 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7759 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7760 break; 7761 7762 default: assert(false, "%s", type2name(to_elem_bt)); 7763 } 7764 %} 7765 ins_pipe( pipe_slow ); 7766 %} 7767 7768 instruct vcastFtoD_reg(vec dst, vec src) %{ 7769 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7770 match(Set dst (VectorCastF2X src)); 7771 format %{ "vector_cast_f2d $dst,$src\t!" %} 7772 ins_encode %{ 7773 int vlen_enc = vector_length_encoding(this); 7774 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7775 %} 7776 ins_pipe( pipe_slow ); 7777 %} 7778 7779 7780 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7781 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7782 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7783 match(Set dst (VectorCastF2X src)); 7784 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7785 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7786 ins_encode %{ 7787 int vlen_enc = vector_length_encoding(this, $src); 7788 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7789 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7790 // 32 bit addresses for register indirect addressing mode since stub constants 7791 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7792 // However, targets are free to increase this limit, but having a large code cache size 7793 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7794 // cap we save a temporary register allocation which in limiting case can prevent 7795 // spilling in high register pressure blocks. 7796 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7797 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7798 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7799 %} 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7804 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7805 is_integral_type(Matcher::vector_element_basic_type(n))); 7806 match(Set dst (VectorCastF2X src)); 7807 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7808 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7809 ins_encode %{ 7810 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7811 if (to_elem_bt == T_LONG) { 7812 int vlen_enc = vector_length_encoding(this); 7813 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7814 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7815 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7816 } else { 7817 int vlen_enc = vector_length_encoding(this, $src); 7818 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7819 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7820 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7821 } 7822 %} 7823 ins_pipe( pipe_slow ); 7824 %} 7825 7826 instruct vcastDtoF_reg(vec dst, vec src) %{ 7827 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7828 match(Set dst (VectorCastD2X src)); 7829 format %{ "vector_cast_d2x $dst,$src\t!" %} 7830 ins_encode %{ 7831 int vlen_enc = vector_length_encoding(this, $src); 7832 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7838 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7839 is_integral_type(Matcher::vector_element_basic_type(n))); 7840 match(Set dst (VectorCastD2X src)); 7841 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7842 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7843 ins_encode %{ 7844 int vlen_enc = vector_length_encoding(this, $src); 7845 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7846 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7847 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7848 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7854 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7855 is_integral_type(Matcher::vector_element_basic_type(n))); 7856 match(Set dst (VectorCastD2X src)); 7857 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7858 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7859 ins_encode %{ 7860 int vlen_enc = vector_length_encoding(this, $src); 7861 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7862 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7863 ExternalAddress(vector_float_signflip()); 7864 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7865 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct vucast(vec dst, vec src) %{ 7871 match(Set dst (VectorUCastB2X src)); 7872 match(Set dst (VectorUCastS2X src)); 7873 match(Set dst (VectorUCastI2X src)); 7874 format %{ "vector_ucast $dst,$src\t!" %} 7875 ins_encode %{ 7876 assert(UseAVX > 0, "required"); 7877 7878 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7879 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7880 int vlen_enc = vector_length_encoding(this); 7881 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7887 predicate(!VM_Version::supports_avx512vl() && 7888 Matcher::vector_length_in_bytes(n) < 64 && 7889 Matcher::vector_element_basic_type(n) == T_INT); 7890 match(Set dst (RoundVF src)); 7891 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7892 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7893 ins_encode %{ 7894 int vlen_enc = vector_length_encoding(this); 7895 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7896 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7897 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7898 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7904 predicate((VM_Version::supports_avx512vl() || 7905 Matcher::vector_length_in_bytes(n) == 64) && 7906 Matcher::vector_element_basic_type(n) == T_INT); 7907 match(Set dst (RoundVF src)); 7908 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7909 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7910 ins_encode %{ 7911 int vlen_enc = vector_length_encoding(this); 7912 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7913 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7914 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7915 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7921 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7922 match(Set dst (RoundVD src)); 7923 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7924 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7925 ins_encode %{ 7926 int vlen_enc = vector_length_encoding(this); 7927 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7928 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7929 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7930 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7931 %} 7932 ins_pipe( pipe_slow ); 7933 %} 7934 7935 // --------------------------------- VectorMaskCmp -------------------------------------- 7936 7937 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7938 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7939 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7940 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7941 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7942 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7943 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7944 ins_encode %{ 7945 int vlen_enc = vector_length_encoding(this, $src1); 7946 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7947 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7948 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7949 } else { 7950 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7951 } 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7957 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7958 n->bottom_type()->isa_vectmask() == nullptr && 7959 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7960 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7961 effect(TEMP ktmp); 7962 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7963 ins_encode %{ 7964 int vlen_enc = Assembler::AVX_512bit; 7965 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7966 KRegister mask = k0; // The comparison itself is not being masked. 7967 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7968 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7969 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7970 } else { 7971 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7972 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7973 } 7974 %} 7975 ins_pipe( pipe_slow ); 7976 %} 7977 7978 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7979 predicate(n->bottom_type()->isa_vectmask() && 7980 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7981 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7982 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7983 ins_encode %{ 7984 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7985 int vlen_enc = vector_length_encoding(this, $src1); 7986 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7987 KRegister mask = k0; // The comparison itself is not being masked. 7988 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7989 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7990 } else { 7991 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7992 } 7993 %} 7994 ins_pipe( pipe_slow ); 7995 %} 7996 7997 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7998 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7999 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8000 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8001 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8002 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8003 (n->in(2)->get_int() == BoolTest::eq || 8004 n->in(2)->get_int() == BoolTest::lt || 8005 n->in(2)->get_int() == BoolTest::gt)); // cond 8006 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8007 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8008 ins_encode %{ 8009 int vlen_enc = vector_length_encoding(this, $src1); 8010 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8011 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8012 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8018 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8019 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8020 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8021 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8022 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8023 (n->in(2)->get_int() == BoolTest::ne || 8024 n->in(2)->get_int() == BoolTest::le || 8025 n->in(2)->get_int() == BoolTest::ge)); // cond 8026 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8027 effect(TEMP dst, TEMP xtmp); 8028 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8029 ins_encode %{ 8030 int vlen_enc = vector_length_encoding(this, $src1); 8031 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8032 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8033 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8039 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8040 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8041 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8042 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8043 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8044 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8045 effect(TEMP dst, TEMP xtmp); 8046 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8047 ins_encode %{ 8048 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8049 int vlen_enc = vector_length_encoding(this, $src1); 8050 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8051 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8052 8053 if (vlen_enc == Assembler::AVX_128bit) { 8054 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8055 } else { 8056 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8057 } 8058 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8059 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8060 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8061 %} 8062 ins_pipe( pipe_slow ); 8063 %} 8064 8065 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8066 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8067 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8068 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8069 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8070 effect(TEMP ktmp); 8071 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8072 ins_encode %{ 8073 assert(UseAVX > 2, "required"); 8074 8075 int vlen_enc = vector_length_encoding(this, $src1); 8076 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8077 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8078 KRegister mask = k0; // The comparison itself is not being masked. 8079 bool merge = false; 8080 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8081 8082 switch (src1_elem_bt) { 8083 case T_INT: { 8084 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8085 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8086 break; 8087 } 8088 case T_LONG: { 8089 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8090 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8091 break; 8092 } 8093 default: assert(false, "%s", type2name(src1_elem_bt)); 8094 } 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 8100 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8101 predicate(n->bottom_type()->isa_vectmask() && 8102 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8103 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8104 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8105 ins_encode %{ 8106 assert(UseAVX > 2, "required"); 8107 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8108 8109 int vlen_enc = vector_length_encoding(this, $src1); 8110 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8111 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8112 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8113 8114 // Comparison i 8115 switch (src1_elem_bt) { 8116 case T_BYTE: { 8117 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8118 break; 8119 } 8120 case T_SHORT: { 8121 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8122 break; 8123 } 8124 case T_INT: { 8125 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8126 break; 8127 } 8128 case T_LONG: { 8129 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8130 break; 8131 } 8132 default: assert(false, "%s", type2name(src1_elem_bt)); 8133 } 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 // Extract 8139 8140 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8141 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8142 match(Set dst (ExtractI src idx)); 8143 match(Set dst (ExtractS src idx)); 8144 match(Set dst (ExtractB src idx)); 8145 format %{ "extractI $dst,$src,$idx\t!" %} 8146 ins_encode %{ 8147 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8148 8149 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8150 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8151 %} 8152 ins_pipe( pipe_slow ); 8153 %} 8154 8155 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8156 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8157 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8158 match(Set dst (ExtractI src idx)); 8159 match(Set dst (ExtractS src idx)); 8160 match(Set dst (ExtractB src idx)); 8161 effect(TEMP vtmp); 8162 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8163 ins_encode %{ 8164 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8165 8166 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8167 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8168 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8169 %} 8170 ins_pipe( pipe_slow ); 8171 %} 8172 8173 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8174 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8175 match(Set dst (ExtractL src idx)); 8176 format %{ "extractL $dst,$src,$idx\t!" %} 8177 ins_encode %{ 8178 assert(UseSSE >= 4, "required"); 8179 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8180 8181 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8187 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8188 Matcher::vector_length(n->in(1)) == 8); // src 8189 match(Set dst (ExtractL src idx)); 8190 effect(TEMP vtmp); 8191 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8192 ins_encode %{ 8193 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8194 8195 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8196 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8197 %} 8198 ins_pipe( pipe_slow ); 8199 %} 8200 8201 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8202 predicate(Matcher::vector_length(n->in(1)) <= 4); 8203 match(Set dst (ExtractF src idx)); 8204 effect(TEMP dst, TEMP vtmp); 8205 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8206 ins_encode %{ 8207 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8208 8209 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8215 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8216 Matcher::vector_length(n->in(1)/*src*/) == 16); 8217 match(Set dst (ExtractF src idx)); 8218 effect(TEMP vtmp); 8219 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8220 ins_encode %{ 8221 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8222 8223 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8224 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8225 %} 8226 ins_pipe( pipe_slow ); 8227 %} 8228 8229 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8230 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8231 match(Set dst (ExtractD src idx)); 8232 format %{ "extractD $dst,$src,$idx\t!" %} 8233 ins_encode %{ 8234 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8235 8236 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8242 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8243 Matcher::vector_length(n->in(1)) == 8); // src 8244 match(Set dst (ExtractD src idx)); 8245 effect(TEMP vtmp); 8246 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8247 ins_encode %{ 8248 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8249 8250 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8251 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8252 %} 8253 ins_pipe( pipe_slow ); 8254 %} 8255 8256 // --------------------------------- Vector Blend -------------------------------------- 8257 8258 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8259 predicate(UseAVX == 0); 8260 match(Set dst (VectorBlend (Binary dst src) mask)); 8261 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8262 effect(TEMP tmp); 8263 ins_encode %{ 8264 assert(UseSSE >= 4, "required"); 8265 8266 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8267 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8268 } 8269 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8275 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8276 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8277 Matcher::vector_length_in_bytes(n) <= 32 && 8278 is_integral_type(Matcher::vector_element_basic_type(n))); 8279 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8280 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8281 ins_encode %{ 8282 int vlen_enc = vector_length_encoding(this); 8283 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8289 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8290 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8291 Matcher::vector_length_in_bytes(n) <= 32 && 8292 !is_integral_type(Matcher::vector_element_basic_type(n))); 8293 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8294 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8295 ins_encode %{ 8296 int vlen_enc = vector_length_encoding(this); 8297 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8298 %} 8299 ins_pipe( pipe_slow ); 8300 %} 8301 8302 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8303 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8304 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8305 Matcher::vector_length_in_bytes(n) <= 32); 8306 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8307 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8308 effect(TEMP vtmp, TEMP dst); 8309 ins_encode %{ 8310 int vlen_enc = vector_length_encoding(this); 8311 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8312 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8313 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8319 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8320 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8321 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8322 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8323 effect(TEMP ktmp); 8324 ins_encode %{ 8325 int vlen_enc = Assembler::AVX_512bit; 8326 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8327 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8328 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8329 %} 8330 ins_pipe( pipe_slow ); 8331 %} 8332 8333 8334 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8335 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8336 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8337 VM_Version::supports_avx512bw())); 8338 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8339 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8340 ins_encode %{ 8341 int vlen_enc = vector_length_encoding(this); 8342 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8343 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8344 %} 8345 ins_pipe( pipe_slow ); 8346 %} 8347 8348 // --------------------------------- ABS -------------------------------------- 8349 // a = |a| 8350 instruct vabsB_reg(vec dst, vec src) %{ 8351 match(Set dst (AbsVB src)); 8352 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8353 ins_encode %{ 8354 uint vlen = Matcher::vector_length(this); 8355 if (vlen <= 16) { 8356 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8357 } else { 8358 int vlen_enc = vector_length_encoding(this); 8359 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8360 } 8361 %} 8362 ins_pipe( pipe_slow ); 8363 %} 8364 8365 instruct vabsS_reg(vec dst, vec src) %{ 8366 match(Set dst (AbsVS src)); 8367 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8368 ins_encode %{ 8369 uint vlen = Matcher::vector_length(this); 8370 if (vlen <= 8) { 8371 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8372 } else { 8373 int vlen_enc = vector_length_encoding(this); 8374 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8375 } 8376 %} 8377 ins_pipe( pipe_slow ); 8378 %} 8379 8380 instruct vabsI_reg(vec dst, vec src) %{ 8381 match(Set dst (AbsVI src)); 8382 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8383 ins_encode %{ 8384 uint vlen = Matcher::vector_length(this); 8385 if (vlen <= 4) { 8386 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8387 } else { 8388 int vlen_enc = vector_length_encoding(this); 8389 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8390 } 8391 %} 8392 ins_pipe( pipe_slow ); 8393 %} 8394 8395 instruct vabsL_reg(vec dst, vec src) %{ 8396 match(Set dst (AbsVL src)); 8397 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8398 ins_encode %{ 8399 assert(UseAVX > 2, "required"); 8400 int vlen_enc = vector_length_encoding(this); 8401 if (!VM_Version::supports_avx512vl()) { 8402 vlen_enc = Assembler::AVX_512bit; 8403 } 8404 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8405 %} 8406 ins_pipe( pipe_slow ); 8407 %} 8408 8409 // --------------------------------- ABSNEG -------------------------------------- 8410 8411 instruct vabsnegF(vec dst, vec src) %{ 8412 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8413 match(Set dst (AbsVF src)); 8414 match(Set dst (NegVF src)); 8415 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8416 ins_cost(150); 8417 ins_encode %{ 8418 int opcode = this->ideal_Opcode(); 8419 int vlen = Matcher::vector_length(this); 8420 if (vlen == 2) { 8421 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8422 } else { 8423 assert(vlen == 8 || vlen == 16, "required"); 8424 int vlen_enc = vector_length_encoding(this); 8425 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8426 } 8427 %} 8428 ins_pipe( pipe_slow ); 8429 %} 8430 8431 instruct vabsneg4F(vec dst) %{ 8432 predicate(Matcher::vector_length(n) == 4); 8433 match(Set dst (AbsVF dst)); 8434 match(Set dst (NegVF dst)); 8435 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8436 ins_cost(150); 8437 ins_encode %{ 8438 int opcode = this->ideal_Opcode(); 8439 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct vabsnegD(vec dst, vec src) %{ 8445 match(Set dst (AbsVD src)); 8446 match(Set dst (NegVD src)); 8447 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8448 ins_encode %{ 8449 int opcode = this->ideal_Opcode(); 8450 uint vlen = Matcher::vector_length(this); 8451 if (vlen == 2) { 8452 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8453 } else { 8454 int vlen_enc = vector_length_encoding(this); 8455 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8456 } 8457 %} 8458 ins_pipe( pipe_slow ); 8459 %} 8460 8461 //------------------------------------- VectorTest -------------------------------------------- 8462 8463 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8464 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8465 match(Set cr (VectorTest src1 src2)); 8466 effect(TEMP vtmp); 8467 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8468 ins_encode %{ 8469 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8470 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8471 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8472 %} 8473 ins_pipe( pipe_slow ); 8474 %} 8475 8476 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8477 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8478 match(Set cr (VectorTest src1 src2)); 8479 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8480 ins_encode %{ 8481 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8482 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8483 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8484 %} 8485 ins_pipe( pipe_slow ); 8486 %} 8487 8488 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8489 predicate((Matcher::vector_length(n->in(1)) < 8 || 8490 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8491 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8492 match(Set cr (VectorTest src1 src2)); 8493 effect(TEMP tmp); 8494 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8495 ins_encode %{ 8496 uint masklen = Matcher::vector_length(this, $src1); 8497 __ kmovwl($tmp$$Register, $src1$$KRegister); 8498 __ andl($tmp$$Register, (1 << masklen) - 1); 8499 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8500 %} 8501 ins_pipe( pipe_slow ); 8502 %} 8503 8504 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8505 predicate((Matcher::vector_length(n->in(1)) < 8 || 8506 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8507 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8508 match(Set cr (VectorTest src1 src2)); 8509 effect(TEMP tmp); 8510 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8511 ins_encode %{ 8512 uint masklen = Matcher::vector_length(this, $src1); 8513 __ kmovwl($tmp$$Register, $src1$$KRegister); 8514 __ andl($tmp$$Register, (1 << masklen) - 1); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8520 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8521 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8522 match(Set cr (VectorTest src1 src2)); 8523 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8524 ins_encode %{ 8525 uint masklen = Matcher::vector_length(this, $src1); 8526 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8527 %} 8528 ins_pipe( pipe_slow ); 8529 %} 8530 8531 //------------------------------------- LoadMask -------------------------------------------- 8532 8533 instruct loadMask(legVec dst, legVec src) %{ 8534 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8535 match(Set dst (VectorLoadMask src)); 8536 effect(TEMP dst); 8537 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8538 ins_encode %{ 8539 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8540 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8541 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8542 %} 8543 ins_pipe( pipe_slow ); 8544 %} 8545 8546 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8547 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8548 match(Set dst (VectorLoadMask src)); 8549 effect(TEMP xtmp); 8550 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8551 ins_encode %{ 8552 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8553 true, Assembler::AVX_512bit); 8554 %} 8555 ins_pipe( pipe_slow ); 8556 %} 8557 8558 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8559 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8560 match(Set dst (VectorLoadMask src)); 8561 effect(TEMP xtmp); 8562 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8563 ins_encode %{ 8564 int vlen_enc = vector_length_encoding(in(1)); 8565 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8566 false, vlen_enc); 8567 %} 8568 ins_pipe( pipe_slow ); 8569 %} 8570 8571 //------------------------------------- StoreMask -------------------------------------------- 8572 8573 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8574 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8575 match(Set dst (VectorStoreMask src size)); 8576 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8577 ins_encode %{ 8578 int vlen = Matcher::vector_length(this); 8579 if (vlen <= 16 && UseAVX <= 2) { 8580 assert(UseSSE >= 3, "required"); 8581 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8582 } else { 8583 assert(UseAVX > 0, "required"); 8584 int src_vlen_enc = vector_length_encoding(this, $src); 8585 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8586 } 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8592 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8593 match(Set dst (VectorStoreMask src size)); 8594 effect(TEMP_DEF dst, TEMP xtmp); 8595 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8596 ins_encode %{ 8597 int vlen_enc = Assembler::AVX_128bit; 8598 int vlen = Matcher::vector_length(this); 8599 if (vlen <= 8) { 8600 assert(UseSSE >= 3, "required"); 8601 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8602 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8603 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8604 } else { 8605 assert(UseAVX > 0, "required"); 8606 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8607 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8608 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8609 } 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8615 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8616 match(Set dst (VectorStoreMask src size)); 8617 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8618 effect(TEMP_DEF dst, TEMP xtmp); 8619 ins_encode %{ 8620 int vlen_enc = Assembler::AVX_128bit; 8621 int vlen = Matcher::vector_length(this); 8622 if (vlen <= 4) { 8623 assert(UseSSE >= 3, "required"); 8624 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8625 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8626 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8627 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8628 } else { 8629 assert(UseAVX > 0, "required"); 8630 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8631 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8632 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8633 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8634 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8635 } 8636 %} 8637 ins_pipe( pipe_slow ); 8638 %} 8639 8640 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8641 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8642 match(Set dst (VectorStoreMask src size)); 8643 effect(TEMP_DEF dst, TEMP xtmp); 8644 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8645 ins_encode %{ 8646 assert(UseSSE >= 3, "required"); 8647 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8648 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8649 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8650 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8651 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8657 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8658 match(Set dst (VectorStoreMask src size)); 8659 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8660 effect(TEMP_DEF dst, TEMP vtmp); 8661 ins_encode %{ 8662 int vlen_enc = Assembler::AVX_128bit; 8663 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8664 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8665 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8666 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8667 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8668 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8669 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8670 %} 8671 ins_pipe( pipe_slow ); 8672 %} 8673 8674 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8675 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8676 match(Set dst (VectorStoreMask src size)); 8677 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8678 ins_encode %{ 8679 int src_vlen_enc = vector_length_encoding(this, $src); 8680 int dst_vlen_enc = vector_length_encoding(this); 8681 if (!VM_Version::supports_avx512vl()) { 8682 src_vlen_enc = Assembler::AVX_512bit; 8683 } 8684 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8685 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8691 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8692 match(Set dst (VectorStoreMask src size)); 8693 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8694 ins_encode %{ 8695 int src_vlen_enc = vector_length_encoding(this, $src); 8696 int dst_vlen_enc = vector_length_encoding(this); 8697 if (!VM_Version::supports_avx512vl()) { 8698 src_vlen_enc = Assembler::AVX_512bit; 8699 } 8700 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8701 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8702 %} 8703 ins_pipe( pipe_slow ); 8704 %} 8705 8706 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8707 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8708 match(Set dst (VectorStoreMask mask size)); 8709 effect(TEMP_DEF dst); 8710 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8711 ins_encode %{ 8712 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8713 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8714 false, Assembler::AVX_512bit, noreg); 8715 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8721 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8722 match(Set dst (VectorStoreMask mask size)); 8723 effect(TEMP_DEF dst); 8724 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8725 ins_encode %{ 8726 int dst_vlen_enc = vector_length_encoding(this); 8727 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8728 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct vmaskcast_evex(kReg dst) %{ 8734 match(Set dst (VectorMaskCast dst)); 8735 ins_cost(0); 8736 format %{ "vector_mask_cast $dst" %} 8737 ins_encode %{ 8738 // empty 8739 %} 8740 ins_pipe(empty); 8741 %} 8742 8743 instruct vmaskcast(vec dst) %{ 8744 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8745 match(Set dst (VectorMaskCast dst)); 8746 ins_cost(0); 8747 format %{ "vector_mask_cast $dst" %} 8748 ins_encode %{ 8749 // empty 8750 %} 8751 ins_pipe(empty); 8752 %} 8753 8754 instruct vmaskcast_avx(vec dst, vec src) %{ 8755 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8756 match(Set dst (VectorMaskCast src)); 8757 format %{ "vector_mask_cast $dst, $src" %} 8758 ins_encode %{ 8759 int vlen = Matcher::vector_length(this); 8760 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8761 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8762 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8763 %} 8764 ins_pipe(pipe_slow); 8765 %} 8766 8767 //-------------------------------- Load Iota Indices ---------------------------------- 8768 8769 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8770 match(Set dst (VectorLoadConst src)); 8771 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8772 ins_encode %{ 8773 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8774 BasicType bt = Matcher::vector_element_basic_type(this); 8775 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8781 match(Set dst (PopulateIndex src1 src2)); 8782 effect(TEMP dst, TEMP vtmp); 8783 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8784 ins_encode %{ 8785 assert($src2$$constant == 1, "required"); 8786 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8787 int vlen_enc = vector_length_encoding(this); 8788 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8789 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8790 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8791 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8797 match(Set dst (PopulateIndex src1 src2)); 8798 effect(TEMP dst, TEMP vtmp); 8799 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8800 ins_encode %{ 8801 assert($src2$$constant == 1, "required"); 8802 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8803 int vlen_enc = vector_length_encoding(this); 8804 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8805 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8806 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8807 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 //-------------------------------- Rearrange ---------------------------------- 8813 8814 // LoadShuffle/Rearrange for Byte 8815 instruct rearrangeB(vec dst, vec shuffle) %{ 8816 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8817 Matcher::vector_length(n) < 32); 8818 match(Set dst (VectorRearrange dst shuffle)); 8819 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8820 ins_encode %{ 8821 assert(UseSSE >= 4, "required"); 8822 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8828 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8829 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8830 match(Set dst (VectorRearrange src shuffle)); 8831 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8832 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8833 ins_encode %{ 8834 assert(UseAVX >= 2, "required"); 8835 // Swap src into vtmp1 8836 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8837 // Shuffle swapped src to get entries from other 128 bit lane 8838 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8839 // Shuffle original src to get entries from self 128 bit lane 8840 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8841 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8842 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8843 // Perform the blend 8844 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8845 %} 8846 ins_pipe( pipe_slow ); 8847 %} 8848 8849 8850 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8851 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8852 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8853 match(Set dst (VectorRearrange src shuffle)); 8854 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8855 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8856 ins_encode %{ 8857 int vlen_enc = vector_length_encoding(this); 8858 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8859 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8860 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8861 %} 8862 ins_pipe( pipe_slow ); 8863 %} 8864 8865 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8866 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8867 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8868 match(Set dst (VectorRearrange src shuffle)); 8869 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8870 ins_encode %{ 8871 int vlen_enc = vector_length_encoding(this); 8872 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 // LoadShuffle/Rearrange for Short 8878 8879 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8880 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8881 !VM_Version::supports_avx512bw()); 8882 match(Set dst (VectorLoadShuffle src)); 8883 effect(TEMP dst, TEMP vtmp); 8884 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8885 ins_encode %{ 8886 // Create a byte shuffle mask from short shuffle mask 8887 // only byte shuffle instruction available on these platforms 8888 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8889 if (UseAVX == 0) { 8890 assert(vlen_in_bytes <= 16, "required"); 8891 // Multiply each shuffle by two to get byte index 8892 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8893 __ psllw($vtmp$$XMMRegister, 1); 8894 8895 // Duplicate to create 2 copies of byte index 8896 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8897 __ psllw($dst$$XMMRegister, 8); 8898 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8899 8900 // Add one to get alternate byte index 8901 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8902 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8903 } else { 8904 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8905 int vlen_enc = vector_length_encoding(this); 8906 // Multiply each shuffle by two to get byte index 8907 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8908 8909 // Duplicate to create 2 copies of byte index 8910 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8911 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8912 8913 // Add one to get alternate byte index 8914 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8915 } 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct rearrangeS(vec dst, vec shuffle) %{ 8921 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8922 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8923 match(Set dst (VectorRearrange dst shuffle)); 8924 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8925 ins_encode %{ 8926 assert(UseSSE >= 4, "required"); 8927 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8928 %} 8929 ins_pipe( pipe_slow ); 8930 %} 8931 8932 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8933 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8934 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8935 match(Set dst (VectorRearrange src shuffle)); 8936 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8937 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8938 ins_encode %{ 8939 assert(UseAVX >= 2, "required"); 8940 // Swap src into vtmp1 8941 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8942 // Shuffle swapped src to get entries from other 128 bit lane 8943 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8944 // Shuffle original src to get entries from self 128 bit lane 8945 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8946 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8947 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8948 // Perform the blend 8949 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8950 %} 8951 ins_pipe( pipe_slow ); 8952 %} 8953 8954 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8955 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8956 VM_Version::supports_avx512bw()); 8957 match(Set dst (VectorRearrange src shuffle)); 8958 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8959 ins_encode %{ 8960 int vlen_enc = vector_length_encoding(this); 8961 if (!VM_Version::supports_avx512vl()) { 8962 vlen_enc = Assembler::AVX_512bit; 8963 } 8964 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 // LoadShuffle/Rearrange for Integer and Float 8970 8971 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8972 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8973 Matcher::vector_length(n) == 4 && UseAVX == 0); 8974 match(Set dst (VectorLoadShuffle src)); 8975 effect(TEMP dst, TEMP vtmp); 8976 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8977 ins_encode %{ 8978 assert(UseSSE >= 4, "required"); 8979 8980 // Create a byte shuffle mask from int shuffle mask 8981 // only byte shuffle instruction available on these platforms 8982 8983 // Duplicate and multiply each shuffle by 4 8984 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8985 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8986 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8987 __ psllw($vtmp$$XMMRegister, 2); 8988 8989 // Duplicate again to create 4 copies of byte index 8990 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8991 __ psllw($dst$$XMMRegister, 8); 8992 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8993 8994 // Add 3,2,1,0 to get alternate byte index 8995 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8996 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8997 %} 8998 ins_pipe( pipe_slow ); 8999 %} 9000 9001 instruct rearrangeI(vec dst, vec shuffle) %{ 9002 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9003 UseAVX == 0); 9004 match(Set dst (VectorRearrange dst shuffle)); 9005 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9006 ins_encode %{ 9007 assert(UseSSE >= 4, "required"); 9008 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9009 %} 9010 ins_pipe( pipe_slow ); 9011 %} 9012 9013 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9014 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9015 UseAVX > 0); 9016 match(Set dst (VectorRearrange src shuffle)); 9017 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9018 ins_encode %{ 9019 int vlen_enc = vector_length_encoding(this); 9020 BasicType bt = Matcher::vector_element_basic_type(this); 9021 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 // LoadShuffle/Rearrange for Long and Double 9027 9028 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9029 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9030 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9031 match(Set dst (VectorLoadShuffle src)); 9032 effect(TEMP dst, TEMP vtmp); 9033 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9034 ins_encode %{ 9035 assert(UseAVX >= 2, "required"); 9036 9037 int vlen_enc = vector_length_encoding(this); 9038 // Create a double word shuffle mask from long shuffle mask 9039 // only double word shuffle instruction available on these platforms 9040 9041 // Multiply each shuffle by two to get double word index 9042 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9043 9044 // Duplicate each double word shuffle 9045 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9046 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9047 9048 // Add one to get alternate double word index 9049 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9050 %} 9051 ins_pipe( pipe_slow ); 9052 %} 9053 9054 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9055 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9056 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9057 match(Set dst (VectorRearrange src shuffle)); 9058 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9059 ins_encode %{ 9060 assert(UseAVX >= 2, "required"); 9061 9062 int vlen_enc = vector_length_encoding(this); 9063 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9069 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9070 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9071 match(Set dst (VectorRearrange src shuffle)); 9072 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9073 ins_encode %{ 9074 assert(UseAVX > 2, "required"); 9075 9076 int vlen_enc = vector_length_encoding(this); 9077 if (vlen_enc == Assembler::AVX_128bit) { 9078 vlen_enc = Assembler::AVX_256bit; 9079 } 9080 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9081 %} 9082 ins_pipe( pipe_slow ); 9083 %} 9084 9085 // --------------------------------- FMA -------------------------------------- 9086 // a * b + c 9087 9088 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9089 match(Set c (FmaVF c (Binary a b))); 9090 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9091 ins_cost(150); 9092 ins_encode %{ 9093 assert(UseFMA, "not enabled"); 9094 int vlen_enc = vector_length_encoding(this); 9095 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9096 %} 9097 ins_pipe( pipe_slow ); 9098 %} 9099 9100 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9101 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9102 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9103 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9104 ins_cost(150); 9105 ins_encode %{ 9106 assert(UseFMA, "not enabled"); 9107 int vlen_enc = vector_length_encoding(this); 9108 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9109 %} 9110 ins_pipe( pipe_slow ); 9111 %} 9112 9113 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9114 match(Set c (FmaVD c (Binary a b))); 9115 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9116 ins_cost(150); 9117 ins_encode %{ 9118 assert(UseFMA, "not enabled"); 9119 int vlen_enc = vector_length_encoding(this); 9120 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9126 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9127 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9128 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9129 ins_cost(150); 9130 ins_encode %{ 9131 assert(UseFMA, "not enabled"); 9132 int vlen_enc = vector_length_encoding(this); 9133 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9134 %} 9135 ins_pipe( pipe_slow ); 9136 %} 9137 9138 // --------------------------------- Vector Multiply Add -------------------------------------- 9139 9140 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9141 predicate(UseAVX == 0); 9142 match(Set dst (MulAddVS2VI dst src1)); 9143 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9144 ins_encode %{ 9145 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9146 %} 9147 ins_pipe( pipe_slow ); 9148 %} 9149 9150 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9151 predicate(UseAVX > 0); 9152 match(Set dst (MulAddVS2VI src1 src2)); 9153 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9154 ins_encode %{ 9155 int vlen_enc = vector_length_encoding(this); 9156 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9157 %} 9158 ins_pipe( pipe_slow ); 9159 %} 9160 9161 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9162 9163 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9164 predicate(VM_Version::supports_avx512_vnni()); 9165 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9166 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9167 ins_encode %{ 9168 assert(UseAVX > 2, "required"); 9169 int vlen_enc = vector_length_encoding(this); 9170 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9171 %} 9172 ins_pipe( pipe_slow ); 9173 ins_cost(10); 9174 %} 9175 9176 // --------------------------------- PopCount -------------------------------------- 9177 9178 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9179 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9180 match(Set dst (PopCountVI src)); 9181 match(Set dst (PopCountVL src)); 9182 format %{ "vector_popcount_integral $dst, $src" %} 9183 ins_encode %{ 9184 int opcode = this->ideal_Opcode(); 9185 int vlen_enc = vector_length_encoding(this, $src); 9186 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9187 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9193 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9194 match(Set dst (PopCountVI src mask)); 9195 match(Set dst (PopCountVL src mask)); 9196 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9197 ins_encode %{ 9198 int vlen_enc = vector_length_encoding(this, $src); 9199 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9200 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9201 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9207 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9208 match(Set dst (PopCountVI src)); 9209 match(Set dst (PopCountVL src)); 9210 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9211 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9212 ins_encode %{ 9213 int opcode = this->ideal_Opcode(); 9214 int vlen_enc = vector_length_encoding(this, $src); 9215 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9216 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9217 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9223 9224 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9225 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9226 Matcher::vector_length_in_bytes(n->in(1)))); 9227 match(Set dst (CountTrailingZerosV src)); 9228 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9229 ins_cost(400); 9230 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9231 ins_encode %{ 9232 int vlen_enc = vector_length_encoding(this, $src); 9233 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9234 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9235 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9241 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9242 VM_Version::supports_avx512cd() && 9243 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9244 match(Set dst (CountTrailingZerosV src)); 9245 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9246 ins_cost(400); 9247 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9248 ins_encode %{ 9249 int vlen_enc = vector_length_encoding(this, $src); 9250 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9251 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9252 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9253 %} 9254 ins_pipe( pipe_slow ); 9255 %} 9256 9257 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9258 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9259 match(Set dst (CountTrailingZerosV src)); 9260 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9261 ins_cost(400); 9262 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9263 ins_encode %{ 9264 int vlen_enc = vector_length_encoding(this, $src); 9265 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9266 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9267 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9268 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9269 %} 9270 ins_pipe( pipe_slow ); 9271 %} 9272 9273 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9274 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9275 match(Set dst (CountTrailingZerosV src)); 9276 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9277 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9278 ins_encode %{ 9279 int vlen_enc = vector_length_encoding(this, $src); 9280 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9281 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9282 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9283 %} 9284 ins_pipe( pipe_slow ); 9285 %} 9286 9287 9288 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9289 9290 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9291 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9292 effect(TEMP dst); 9293 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9294 ins_encode %{ 9295 int vector_len = vector_length_encoding(this); 9296 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9302 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9303 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9304 effect(TEMP dst); 9305 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9306 ins_encode %{ 9307 int vector_len = vector_length_encoding(this); 9308 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9309 %} 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 // --------------------------------- Rotation Operations ---------------------------------- 9314 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9315 match(Set dst (RotateLeftV src shift)); 9316 match(Set dst (RotateRightV src shift)); 9317 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9318 ins_encode %{ 9319 int opcode = this->ideal_Opcode(); 9320 int vector_len = vector_length_encoding(this); 9321 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9322 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9323 %} 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 instruct vprorate(vec dst, vec src, vec shift) %{ 9328 match(Set dst (RotateLeftV src shift)); 9329 match(Set dst (RotateRightV src shift)); 9330 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9331 ins_encode %{ 9332 int opcode = this->ideal_Opcode(); 9333 int vector_len = vector_length_encoding(this); 9334 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9335 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 // ---------------------------------- Masked Operations ------------------------------------ 9341 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9342 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9343 match(Set dst (LoadVectorMasked mem mask)); 9344 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9345 ins_encode %{ 9346 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9347 int vlen_enc = vector_length_encoding(this); 9348 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9349 %} 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 9354 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9355 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9356 match(Set dst (LoadVectorMasked mem mask)); 9357 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9358 ins_encode %{ 9359 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9360 int vector_len = vector_length_encoding(this); 9361 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9362 %} 9363 ins_pipe( pipe_slow ); 9364 %} 9365 9366 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9367 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9368 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9369 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9370 ins_encode %{ 9371 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9372 int vlen_enc = vector_length_encoding(src_node); 9373 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9374 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9380 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9381 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9382 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9383 ins_encode %{ 9384 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9385 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9386 int vlen_enc = vector_length_encoding(src_node); 9387 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9388 %} 9389 ins_pipe( pipe_slow ); 9390 %} 9391 9392 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9393 match(Set addr (VerifyVectorAlignment addr mask)); 9394 effect(KILL cr); 9395 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9396 ins_encode %{ 9397 Label Lskip; 9398 // check if masked bits of addr are zero 9399 __ testq($addr$$Register, $mask$$constant); 9400 __ jccb(Assembler::equal, Lskip); 9401 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9402 __ bind(Lskip); 9403 %} 9404 ins_pipe(pipe_slow); 9405 %} 9406 9407 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9408 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9409 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9410 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9411 ins_encode %{ 9412 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9413 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9414 9415 Label DONE; 9416 int vlen_enc = vector_length_encoding(this, $src1); 9417 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9418 9419 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9420 __ mov64($dst$$Register, -1L); 9421 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9422 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9423 __ jccb(Assembler::carrySet, DONE); 9424 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9425 __ notq($dst$$Register); 9426 __ tzcntq($dst$$Register, $dst$$Register); 9427 __ bind(DONE); 9428 %} 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 9433 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9434 match(Set dst (VectorMaskGen len)); 9435 effect(TEMP temp, KILL cr); 9436 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9437 ins_encode %{ 9438 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9439 %} 9440 ins_pipe( pipe_slow ); 9441 %} 9442 9443 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9444 match(Set dst (VectorMaskGen len)); 9445 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9446 effect(TEMP temp); 9447 ins_encode %{ 9448 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9449 __ kmovql($dst$$KRegister, $temp$$Register); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9455 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9456 match(Set dst (VectorMaskToLong mask)); 9457 effect(TEMP dst, KILL cr); 9458 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9459 ins_encode %{ 9460 int opcode = this->ideal_Opcode(); 9461 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9462 int mask_len = Matcher::vector_length(this, $mask); 9463 int mask_size = mask_len * type2aelembytes(mbt); 9464 int vlen_enc = vector_length_encoding(this, $mask); 9465 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9466 $dst$$Register, mask_len, mask_size, vlen_enc); 9467 %} 9468 ins_pipe( pipe_slow ); 9469 %} 9470 9471 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9472 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9473 match(Set dst (VectorMaskToLong mask)); 9474 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9475 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9476 ins_encode %{ 9477 int opcode = this->ideal_Opcode(); 9478 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9479 int mask_len = Matcher::vector_length(this, $mask); 9480 int vlen_enc = vector_length_encoding(this, $mask); 9481 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9482 $dst$$Register, mask_len, mbt, vlen_enc); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9488 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9489 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9490 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9491 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9492 ins_encode %{ 9493 int opcode = this->ideal_Opcode(); 9494 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9495 int mask_len = Matcher::vector_length(this, $mask); 9496 int vlen_enc = vector_length_encoding(this, $mask); 9497 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9498 $dst$$Register, mask_len, mbt, vlen_enc); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9504 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9505 match(Set dst (VectorMaskTrueCount mask)); 9506 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9507 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9508 ins_encode %{ 9509 int opcode = this->ideal_Opcode(); 9510 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9511 int mask_len = Matcher::vector_length(this, $mask); 9512 int mask_size = mask_len * type2aelembytes(mbt); 9513 int vlen_enc = vector_length_encoding(this, $mask); 9514 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9515 $tmp$$Register, mask_len, mask_size, vlen_enc); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9521 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9522 match(Set dst (VectorMaskTrueCount mask)); 9523 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9524 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9525 ins_encode %{ 9526 int opcode = this->ideal_Opcode(); 9527 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9528 int mask_len = Matcher::vector_length(this, $mask); 9529 int vlen_enc = vector_length_encoding(this, $mask); 9530 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9531 $tmp$$Register, mask_len, mbt, vlen_enc); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9537 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9538 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9539 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9540 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9541 ins_encode %{ 9542 int opcode = this->ideal_Opcode(); 9543 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9544 int mask_len = Matcher::vector_length(this, $mask); 9545 int vlen_enc = vector_length_encoding(this, $mask); 9546 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9547 $tmp$$Register, mask_len, mbt, vlen_enc); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9553 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9554 match(Set dst (VectorMaskFirstTrue mask)); 9555 match(Set dst (VectorMaskLastTrue mask)); 9556 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9557 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9558 ins_encode %{ 9559 int opcode = this->ideal_Opcode(); 9560 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9561 int mask_len = Matcher::vector_length(this, $mask); 9562 int mask_size = mask_len * type2aelembytes(mbt); 9563 int vlen_enc = vector_length_encoding(this, $mask); 9564 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9565 $tmp$$Register, mask_len, mask_size, vlen_enc); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9571 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9572 match(Set dst (VectorMaskFirstTrue mask)); 9573 match(Set dst (VectorMaskLastTrue mask)); 9574 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9575 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9576 ins_encode %{ 9577 int opcode = this->ideal_Opcode(); 9578 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9579 int mask_len = Matcher::vector_length(this, $mask); 9580 int vlen_enc = vector_length_encoding(this, $mask); 9581 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9582 $tmp$$Register, mask_len, mbt, vlen_enc); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9588 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9589 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9590 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9591 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9592 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9593 ins_encode %{ 9594 int opcode = this->ideal_Opcode(); 9595 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9596 int mask_len = Matcher::vector_length(this, $mask); 9597 int vlen_enc = vector_length_encoding(this, $mask); 9598 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9599 $tmp$$Register, mask_len, mbt, vlen_enc); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 // --------------------------------- Compress/Expand Operations --------------------------- 9605 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9606 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9607 match(Set dst (CompressV src mask)); 9608 match(Set dst (ExpandV src mask)); 9609 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9610 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9611 ins_encode %{ 9612 int opcode = this->ideal_Opcode(); 9613 int vlen_enc = vector_length_encoding(this); 9614 BasicType bt = Matcher::vector_element_basic_type(this); 9615 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9616 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9617 %} 9618 ins_pipe( pipe_slow ); 9619 %} 9620 9621 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9622 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9623 match(Set dst (CompressV src mask)); 9624 match(Set dst (ExpandV src mask)); 9625 format %{ "vector_compress_expand $dst, $src, $mask" %} 9626 ins_encode %{ 9627 int opcode = this->ideal_Opcode(); 9628 int vector_len = vector_length_encoding(this); 9629 BasicType bt = Matcher::vector_element_basic_type(this); 9630 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9631 %} 9632 ins_pipe( pipe_slow ); 9633 %} 9634 9635 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9636 match(Set dst (CompressM mask)); 9637 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9638 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9639 ins_encode %{ 9640 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9641 int mask_len = Matcher::vector_length(this); 9642 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9643 %} 9644 ins_pipe( pipe_slow ); 9645 %} 9646 9647 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9648 9649 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9650 predicate(!VM_Version::supports_gfni()); 9651 match(Set dst (ReverseV src)); 9652 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9653 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9654 ins_encode %{ 9655 int vec_enc = vector_length_encoding(this); 9656 BasicType bt = Matcher::vector_element_basic_type(this); 9657 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9658 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9664 predicate(VM_Version::supports_gfni()); 9665 match(Set dst (ReverseV src)); 9666 effect(TEMP dst, TEMP xtmp); 9667 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9668 ins_encode %{ 9669 int vec_enc = vector_length_encoding(this); 9670 BasicType bt = Matcher::vector_element_basic_type(this); 9671 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9672 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9673 $xtmp$$XMMRegister); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vreverse_byte_reg(vec dst, vec src) %{ 9679 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9680 match(Set dst (ReverseBytesV src)); 9681 effect(TEMP dst); 9682 format %{ "vector_reverse_byte $dst, $src" %} 9683 ins_encode %{ 9684 int vec_enc = vector_length_encoding(this); 9685 BasicType bt = Matcher::vector_element_basic_type(this); 9686 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9692 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9693 match(Set dst (ReverseBytesV src)); 9694 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9695 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9696 ins_encode %{ 9697 int vec_enc = vector_length_encoding(this); 9698 BasicType bt = Matcher::vector_element_basic_type(this); 9699 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9700 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9706 9707 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9708 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9709 Matcher::vector_length_in_bytes(n->in(1)))); 9710 match(Set dst (CountLeadingZerosV src)); 9711 format %{ "vector_count_leading_zeros $dst, $src" %} 9712 ins_encode %{ 9713 int vlen_enc = vector_length_encoding(this, $src); 9714 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9715 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9716 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9722 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9723 Matcher::vector_length_in_bytes(n->in(1)))); 9724 match(Set dst (CountLeadingZerosV src mask)); 9725 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9726 ins_encode %{ 9727 int vlen_enc = vector_length_encoding(this, $src); 9728 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9729 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9730 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9731 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9737 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9738 VM_Version::supports_avx512cd() && 9739 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9740 match(Set dst (CountLeadingZerosV src)); 9741 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9742 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9743 ins_encode %{ 9744 int vlen_enc = vector_length_encoding(this, $src); 9745 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9746 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9747 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9753 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9754 match(Set dst (CountLeadingZerosV src)); 9755 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9756 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9757 ins_encode %{ 9758 int vlen_enc = vector_length_encoding(this, $src); 9759 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9760 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9761 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9762 $rtmp$$Register, true, vlen_enc); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9768 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9769 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9770 match(Set dst (CountLeadingZerosV src)); 9771 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9772 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9773 ins_encode %{ 9774 int vlen_enc = vector_length_encoding(this, $src); 9775 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9776 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9777 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9778 %} 9779 ins_pipe( pipe_slow ); 9780 %} 9781 9782 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9783 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9784 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9785 match(Set dst (CountLeadingZerosV src)); 9786 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9787 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9788 ins_encode %{ 9789 int vlen_enc = vector_length_encoding(this, $src); 9790 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9791 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9792 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 // ---------------------------------- Vector Masked Operations ------------------------------------ 9798 9799 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9800 match(Set dst (AddVB (Binary dst src2) mask)); 9801 match(Set dst (AddVS (Binary dst src2) mask)); 9802 match(Set dst (AddVI (Binary dst src2) mask)); 9803 match(Set dst (AddVL (Binary dst src2) mask)); 9804 match(Set dst (AddVF (Binary dst src2) mask)); 9805 match(Set dst (AddVD (Binary dst src2) mask)); 9806 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9807 ins_encode %{ 9808 int vlen_enc = vector_length_encoding(this); 9809 BasicType bt = Matcher::vector_element_basic_type(this); 9810 int opc = this->ideal_Opcode(); 9811 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9812 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9813 %} 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9818 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9819 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9820 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9821 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9822 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9823 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9824 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9825 ins_encode %{ 9826 int vlen_enc = vector_length_encoding(this); 9827 BasicType bt = Matcher::vector_element_basic_type(this); 9828 int opc = this->ideal_Opcode(); 9829 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9830 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9836 match(Set dst (XorV (Binary dst src2) mask)); 9837 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9838 ins_encode %{ 9839 int vlen_enc = vector_length_encoding(this); 9840 BasicType bt = Matcher::vector_element_basic_type(this); 9841 int opc = this->ideal_Opcode(); 9842 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9843 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9844 %} 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9849 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9850 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9851 ins_encode %{ 9852 int vlen_enc = vector_length_encoding(this); 9853 BasicType bt = Matcher::vector_element_basic_type(this); 9854 int opc = this->ideal_Opcode(); 9855 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9856 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9862 match(Set dst (OrV (Binary dst src2) mask)); 9863 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9864 ins_encode %{ 9865 int vlen_enc = vector_length_encoding(this); 9866 BasicType bt = Matcher::vector_element_basic_type(this); 9867 int opc = this->ideal_Opcode(); 9868 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9869 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9875 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9876 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9877 ins_encode %{ 9878 int vlen_enc = vector_length_encoding(this); 9879 BasicType bt = Matcher::vector_element_basic_type(this); 9880 int opc = this->ideal_Opcode(); 9881 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9882 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9883 %} 9884 ins_pipe( pipe_slow ); 9885 %} 9886 9887 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9888 match(Set dst (AndV (Binary dst src2) mask)); 9889 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9890 ins_encode %{ 9891 int vlen_enc = vector_length_encoding(this); 9892 BasicType bt = Matcher::vector_element_basic_type(this); 9893 int opc = this->ideal_Opcode(); 9894 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9895 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9896 %} 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9901 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9902 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9903 ins_encode %{ 9904 int vlen_enc = vector_length_encoding(this); 9905 BasicType bt = Matcher::vector_element_basic_type(this); 9906 int opc = this->ideal_Opcode(); 9907 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9908 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9909 %} 9910 ins_pipe( pipe_slow ); 9911 %} 9912 9913 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9914 match(Set dst (SubVB (Binary dst src2) mask)); 9915 match(Set dst (SubVS (Binary dst src2) mask)); 9916 match(Set dst (SubVI (Binary dst src2) mask)); 9917 match(Set dst (SubVL (Binary dst src2) mask)); 9918 match(Set dst (SubVF (Binary dst src2) mask)); 9919 match(Set dst (SubVD (Binary dst src2) mask)); 9920 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9921 ins_encode %{ 9922 int vlen_enc = vector_length_encoding(this); 9923 BasicType bt = Matcher::vector_element_basic_type(this); 9924 int opc = this->ideal_Opcode(); 9925 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9926 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9927 %} 9928 ins_pipe( pipe_slow ); 9929 %} 9930 9931 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9932 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9933 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9934 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9935 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9936 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9937 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9938 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9939 ins_encode %{ 9940 int vlen_enc = vector_length_encoding(this); 9941 BasicType bt = Matcher::vector_element_basic_type(this); 9942 int opc = this->ideal_Opcode(); 9943 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9944 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9945 %} 9946 ins_pipe( pipe_slow ); 9947 %} 9948 9949 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9950 match(Set dst (MulVS (Binary dst src2) mask)); 9951 match(Set dst (MulVI (Binary dst src2) mask)); 9952 match(Set dst (MulVL (Binary dst src2) mask)); 9953 match(Set dst (MulVF (Binary dst src2) mask)); 9954 match(Set dst (MulVD (Binary dst src2) mask)); 9955 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9956 ins_encode %{ 9957 int vlen_enc = vector_length_encoding(this); 9958 BasicType bt = Matcher::vector_element_basic_type(this); 9959 int opc = this->ideal_Opcode(); 9960 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9961 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9967 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9968 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9969 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9970 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9971 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9972 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9973 ins_encode %{ 9974 int vlen_enc = vector_length_encoding(this); 9975 BasicType bt = Matcher::vector_element_basic_type(this); 9976 int opc = this->ideal_Opcode(); 9977 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9978 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9979 %} 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9984 match(Set dst (SqrtVF dst mask)); 9985 match(Set dst (SqrtVD dst mask)); 9986 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9987 ins_encode %{ 9988 int vlen_enc = vector_length_encoding(this); 9989 BasicType bt = Matcher::vector_element_basic_type(this); 9990 int opc = this->ideal_Opcode(); 9991 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9992 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9998 match(Set dst (DivVF (Binary dst src2) mask)); 9999 match(Set dst (DivVD (Binary dst src2) mask)); 10000 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10001 ins_encode %{ 10002 int vlen_enc = vector_length_encoding(this); 10003 BasicType bt = Matcher::vector_element_basic_type(this); 10004 int opc = this->ideal_Opcode(); 10005 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10006 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10012 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10013 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10014 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10015 ins_encode %{ 10016 int vlen_enc = vector_length_encoding(this); 10017 BasicType bt = Matcher::vector_element_basic_type(this); 10018 int opc = this->ideal_Opcode(); 10019 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10020 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10021 %} 10022 ins_pipe( pipe_slow ); 10023 %} 10024 10025 10026 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10027 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10028 match(Set dst (RotateRightV (Binary dst shift) mask)); 10029 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10030 ins_encode %{ 10031 int vlen_enc = vector_length_encoding(this); 10032 BasicType bt = Matcher::vector_element_basic_type(this); 10033 int opc = this->ideal_Opcode(); 10034 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10035 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10036 %} 10037 ins_pipe( pipe_slow ); 10038 %} 10039 10040 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10041 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10042 match(Set dst (RotateRightV (Binary dst src2) mask)); 10043 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10044 ins_encode %{ 10045 int vlen_enc = vector_length_encoding(this); 10046 BasicType bt = Matcher::vector_element_basic_type(this); 10047 int opc = this->ideal_Opcode(); 10048 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10049 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10050 %} 10051 ins_pipe( pipe_slow ); 10052 %} 10053 10054 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10055 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10056 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10057 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10058 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10059 ins_encode %{ 10060 int vlen_enc = vector_length_encoding(this); 10061 BasicType bt = Matcher::vector_element_basic_type(this); 10062 int opc = this->ideal_Opcode(); 10063 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10064 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10065 %} 10066 ins_pipe( pipe_slow ); 10067 %} 10068 10069 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10070 predicate(!n->as_ShiftV()->is_var_shift()); 10071 match(Set dst (LShiftVS (Binary dst src2) mask)); 10072 match(Set dst (LShiftVI (Binary dst src2) mask)); 10073 match(Set dst (LShiftVL (Binary dst src2) mask)); 10074 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10075 ins_encode %{ 10076 int vlen_enc = vector_length_encoding(this); 10077 BasicType bt = Matcher::vector_element_basic_type(this); 10078 int opc = this->ideal_Opcode(); 10079 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10080 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10081 %} 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10086 predicate(n->as_ShiftV()->is_var_shift()); 10087 match(Set dst (LShiftVS (Binary dst src2) mask)); 10088 match(Set dst (LShiftVI (Binary dst src2) mask)); 10089 match(Set dst (LShiftVL (Binary dst src2) mask)); 10090 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10091 ins_encode %{ 10092 int vlen_enc = vector_length_encoding(this); 10093 BasicType bt = Matcher::vector_element_basic_type(this); 10094 int opc = this->ideal_Opcode(); 10095 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10096 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10097 %} 10098 ins_pipe( pipe_slow ); 10099 %} 10100 10101 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10102 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10103 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10104 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10105 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10106 ins_encode %{ 10107 int vlen_enc = vector_length_encoding(this); 10108 BasicType bt = Matcher::vector_element_basic_type(this); 10109 int opc = this->ideal_Opcode(); 10110 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10111 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10112 %} 10113 ins_pipe( pipe_slow ); 10114 %} 10115 10116 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10117 predicate(!n->as_ShiftV()->is_var_shift()); 10118 match(Set dst (RShiftVS (Binary dst src2) mask)); 10119 match(Set dst (RShiftVI (Binary dst src2) mask)); 10120 match(Set dst (RShiftVL (Binary dst src2) mask)); 10121 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10122 ins_encode %{ 10123 int vlen_enc = vector_length_encoding(this); 10124 BasicType bt = Matcher::vector_element_basic_type(this); 10125 int opc = this->ideal_Opcode(); 10126 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10127 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10133 predicate(n->as_ShiftV()->is_var_shift()); 10134 match(Set dst (RShiftVS (Binary dst src2) mask)); 10135 match(Set dst (RShiftVI (Binary dst src2) mask)); 10136 match(Set dst (RShiftVL (Binary dst src2) mask)); 10137 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10138 ins_encode %{ 10139 int vlen_enc = vector_length_encoding(this); 10140 BasicType bt = Matcher::vector_element_basic_type(this); 10141 int opc = this->ideal_Opcode(); 10142 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10143 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10149 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10150 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10151 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10152 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10153 ins_encode %{ 10154 int vlen_enc = vector_length_encoding(this); 10155 BasicType bt = Matcher::vector_element_basic_type(this); 10156 int opc = this->ideal_Opcode(); 10157 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10158 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10159 %} 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10164 predicate(!n->as_ShiftV()->is_var_shift()); 10165 match(Set dst (URShiftVS (Binary dst src2) mask)); 10166 match(Set dst (URShiftVI (Binary dst src2) mask)); 10167 match(Set dst (URShiftVL (Binary dst src2) mask)); 10168 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10169 ins_encode %{ 10170 int vlen_enc = vector_length_encoding(this); 10171 BasicType bt = Matcher::vector_element_basic_type(this); 10172 int opc = this->ideal_Opcode(); 10173 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10174 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10175 %} 10176 ins_pipe( pipe_slow ); 10177 %} 10178 10179 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10180 predicate(n->as_ShiftV()->is_var_shift()); 10181 match(Set dst (URShiftVS (Binary dst src2) mask)); 10182 match(Set dst (URShiftVI (Binary dst src2) mask)); 10183 match(Set dst (URShiftVL (Binary dst src2) mask)); 10184 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10185 ins_encode %{ 10186 int vlen_enc = vector_length_encoding(this); 10187 BasicType bt = Matcher::vector_element_basic_type(this); 10188 int opc = this->ideal_Opcode(); 10189 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10190 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10196 match(Set dst (MaxV (Binary dst src2) mask)); 10197 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10198 ins_encode %{ 10199 int vlen_enc = vector_length_encoding(this); 10200 BasicType bt = Matcher::vector_element_basic_type(this); 10201 int opc = this->ideal_Opcode(); 10202 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10203 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10209 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10210 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10211 ins_encode %{ 10212 int vlen_enc = vector_length_encoding(this); 10213 BasicType bt = Matcher::vector_element_basic_type(this); 10214 int opc = this->ideal_Opcode(); 10215 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10216 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10217 %} 10218 ins_pipe( pipe_slow ); 10219 %} 10220 10221 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10222 match(Set dst (MinV (Binary dst src2) mask)); 10223 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10224 ins_encode %{ 10225 int vlen_enc = vector_length_encoding(this); 10226 BasicType bt = Matcher::vector_element_basic_type(this); 10227 int opc = this->ideal_Opcode(); 10228 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10229 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10230 %} 10231 ins_pipe( pipe_slow ); 10232 %} 10233 10234 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10235 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10236 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10237 ins_encode %{ 10238 int vlen_enc = vector_length_encoding(this); 10239 BasicType bt = Matcher::vector_element_basic_type(this); 10240 int opc = this->ideal_Opcode(); 10241 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10242 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10243 %} 10244 ins_pipe( pipe_slow ); 10245 %} 10246 10247 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10248 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10249 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10250 ins_encode %{ 10251 int vlen_enc = vector_length_encoding(this); 10252 BasicType bt = Matcher::vector_element_basic_type(this); 10253 int opc = this->ideal_Opcode(); 10254 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10255 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10256 %} 10257 ins_pipe( pipe_slow ); 10258 %} 10259 10260 instruct vabs_masked(vec dst, kReg mask) %{ 10261 match(Set dst (AbsVB dst mask)); 10262 match(Set dst (AbsVS dst mask)); 10263 match(Set dst (AbsVI dst mask)); 10264 match(Set dst (AbsVL dst mask)); 10265 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10266 ins_encode %{ 10267 int vlen_enc = vector_length_encoding(this); 10268 BasicType bt = Matcher::vector_element_basic_type(this); 10269 int opc = this->ideal_Opcode(); 10270 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10271 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10272 %} 10273 ins_pipe( pipe_slow ); 10274 %} 10275 10276 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10277 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10278 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10279 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10280 ins_encode %{ 10281 assert(UseFMA, "Needs FMA instructions support."); 10282 int vlen_enc = vector_length_encoding(this); 10283 BasicType bt = Matcher::vector_element_basic_type(this); 10284 int opc = this->ideal_Opcode(); 10285 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10286 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10287 %} 10288 ins_pipe( pipe_slow ); 10289 %} 10290 10291 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10292 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10293 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10294 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10295 ins_encode %{ 10296 assert(UseFMA, "Needs FMA instructions support."); 10297 int vlen_enc = vector_length_encoding(this); 10298 BasicType bt = Matcher::vector_element_basic_type(this); 10299 int opc = this->ideal_Opcode(); 10300 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10301 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10302 %} 10303 ins_pipe( pipe_slow ); 10304 %} 10305 10306 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10307 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10308 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10309 ins_encode %{ 10310 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10311 int vlen_enc = vector_length_encoding(this, $src1); 10312 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10313 10314 // Comparison i 10315 switch (src1_elem_bt) { 10316 case T_BYTE: { 10317 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10318 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10319 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10320 break; 10321 } 10322 case T_SHORT: { 10323 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10324 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10325 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10326 break; 10327 } 10328 case T_INT: { 10329 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10330 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10331 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10332 break; 10333 } 10334 case T_LONG: { 10335 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10336 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10337 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10338 break; 10339 } 10340 case T_FLOAT: { 10341 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10342 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10343 break; 10344 } 10345 case T_DOUBLE: { 10346 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10347 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10348 break; 10349 } 10350 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10351 } 10352 %} 10353 ins_pipe( pipe_slow ); 10354 %} 10355 10356 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10357 predicate(Matcher::vector_length(n) <= 32); 10358 match(Set dst (MaskAll src)); 10359 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10360 ins_encode %{ 10361 int mask_len = Matcher::vector_length(this); 10362 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10363 %} 10364 ins_pipe( pipe_slow ); 10365 %} 10366 10367 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10368 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10369 match(Set dst (XorVMask src (MaskAll cnt))); 10370 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10371 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10372 ins_encode %{ 10373 uint masklen = Matcher::vector_length(this); 10374 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10375 %} 10376 ins_pipe( pipe_slow ); 10377 %} 10378 10379 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10380 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10381 (Matcher::vector_length(n) == 16) || 10382 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10383 match(Set dst (XorVMask src (MaskAll cnt))); 10384 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10385 ins_encode %{ 10386 uint masklen = Matcher::vector_length(this); 10387 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10388 %} 10389 ins_pipe( pipe_slow ); 10390 %} 10391 10392 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10393 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10394 match(Set dst (VectorLongToMask src)); 10395 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10396 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10397 ins_encode %{ 10398 int mask_len = Matcher::vector_length(this); 10399 int vec_enc = vector_length_encoding(mask_len); 10400 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10401 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10402 %} 10403 ins_pipe( pipe_slow ); 10404 %} 10405 10406 10407 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10408 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10409 match(Set dst (VectorLongToMask src)); 10410 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10411 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10412 ins_encode %{ 10413 int mask_len = Matcher::vector_length(this); 10414 assert(mask_len <= 32, "invalid mask length"); 10415 int vec_enc = vector_length_encoding(mask_len); 10416 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10417 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10418 %} 10419 ins_pipe( pipe_slow ); 10420 %} 10421 10422 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10423 predicate(n->bottom_type()->isa_vectmask()); 10424 match(Set dst (VectorLongToMask src)); 10425 format %{ "long_to_mask_evex $dst, $src\t!" %} 10426 ins_encode %{ 10427 __ kmov($dst$$KRegister, $src$$Register); 10428 %} 10429 ins_pipe( pipe_slow ); 10430 %} 10431 10432 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10433 match(Set dst (AndVMask src1 src2)); 10434 match(Set dst (OrVMask src1 src2)); 10435 match(Set dst (XorVMask src1 src2)); 10436 effect(TEMP kscratch); 10437 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10438 ins_encode %{ 10439 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10440 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10441 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10442 uint masklen = Matcher::vector_length(this); 10443 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10444 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10445 %} 10446 ins_pipe( pipe_slow ); 10447 %} 10448 10449 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10450 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10451 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10452 ins_encode %{ 10453 int vlen_enc = vector_length_encoding(this); 10454 BasicType bt = Matcher::vector_element_basic_type(this); 10455 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10456 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10457 %} 10458 ins_pipe( pipe_slow ); 10459 %} 10460 10461 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10462 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10463 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10464 ins_encode %{ 10465 int vlen_enc = vector_length_encoding(this); 10466 BasicType bt = Matcher::vector_element_basic_type(this); 10467 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10468 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10469 %} 10470 ins_pipe( pipe_slow ); 10471 %} 10472 10473 instruct castMM(kReg dst) 10474 %{ 10475 match(Set dst (CastVV dst)); 10476 10477 size(0); 10478 format %{ "# castVV of $dst" %} 10479 ins_encode(/* empty encoding */); 10480 ins_cost(0); 10481 ins_pipe(empty); 10482 %} 10483 10484 instruct castVV(vec dst) 10485 %{ 10486 match(Set dst (CastVV dst)); 10487 10488 size(0); 10489 format %{ "# castVV of $dst" %} 10490 ins_encode(/* empty encoding */); 10491 ins_cost(0); 10492 ins_pipe(empty); 10493 %} 10494 10495 instruct castVVLeg(legVec dst) 10496 %{ 10497 match(Set dst (CastVV dst)); 10498 10499 size(0); 10500 format %{ "# castVV of $dst" %} 10501 ins_encode(/* empty encoding */); 10502 ins_cost(0); 10503 ins_pipe(empty); 10504 %} 10505 10506 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10507 %{ 10508 match(Set dst (IsInfiniteF src)); 10509 effect(TEMP ktmp, KILL cr); 10510 format %{ "float_class_check $dst, $src" %} 10511 ins_encode %{ 10512 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10513 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10514 %} 10515 ins_pipe(pipe_slow); 10516 %} 10517 10518 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10519 %{ 10520 match(Set dst (IsInfiniteD src)); 10521 effect(TEMP ktmp, KILL cr); 10522 format %{ "double_class_check $dst, $src" %} 10523 ins_encode %{ 10524 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10525 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10526 %} 10527 ins_pipe(pipe_slow); 10528 %} 10529 10530 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10531 %{ 10532 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10533 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10534 match(Set dst (SaturatingAddV src1 src2)); 10535 match(Set dst (SaturatingSubV src1 src2)); 10536 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10537 ins_encode %{ 10538 int vlen_enc = vector_length_encoding(this); 10539 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10540 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10541 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10542 %} 10543 ins_pipe(pipe_slow); 10544 %} 10545 10546 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10547 %{ 10548 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10549 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10550 match(Set dst (SaturatingAddV src1 src2)); 10551 match(Set dst (SaturatingSubV src1 src2)); 10552 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10553 ins_encode %{ 10554 int vlen_enc = vector_length_encoding(this); 10555 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10556 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10557 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10558 %} 10559 ins_pipe(pipe_slow); 10560 %} 10561 10562 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10563 %{ 10564 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10565 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10566 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10567 match(Set dst (SaturatingAddV src1 src2)); 10568 match(Set dst (SaturatingSubV src1 src2)); 10569 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10570 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10571 ins_encode %{ 10572 int vlen_enc = vector_length_encoding(this); 10573 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10574 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10575 $src1$$XMMRegister, $src2$$XMMRegister, 10576 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10577 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10578 %} 10579 ins_pipe(pipe_slow); 10580 %} 10581 10582 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10583 %{ 10584 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10585 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10586 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10587 match(Set dst (SaturatingAddV src1 src2)); 10588 match(Set dst (SaturatingSubV src1 src2)); 10589 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10590 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10591 ins_encode %{ 10592 int vlen_enc = vector_length_encoding(this); 10593 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10594 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10595 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10596 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10597 %} 10598 ins_pipe(pipe_slow); 10599 %} 10600 10601 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10602 %{ 10603 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10604 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10605 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10606 match(Set dst (SaturatingAddV src1 src2)); 10607 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10608 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10609 ins_encode %{ 10610 int vlen_enc = vector_length_encoding(this); 10611 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10612 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10613 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10614 %} 10615 ins_pipe(pipe_slow); 10616 %} 10617 10618 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10619 %{ 10620 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10621 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10622 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10623 match(Set dst (SaturatingAddV src1 src2)); 10624 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10625 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10626 ins_encode %{ 10627 int vlen_enc = vector_length_encoding(this); 10628 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10629 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10630 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10631 %} 10632 ins_pipe(pipe_slow); 10633 %} 10634 10635 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10636 %{ 10637 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10638 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10639 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10640 match(Set dst (SaturatingSubV src1 src2)); 10641 effect(TEMP ktmp); 10642 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10643 ins_encode %{ 10644 int vlen_enc = vector_length_encoding(this); 10645 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10646 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10647 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10648 %} 10649 ins_pipe(pipe_slow); 10650 %} 10651 10652 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10653 %{ 10654 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10655 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10656 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10657 match(Set dst (SaturatingSubV src1 src2)); 10658 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10659 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10660 ins_encode %{ 10661 int vlen_enc = vector_length_encoding(this); 10662 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10663 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10664 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10665 %} 10666 ins_pipe(pipe_slow); 10667 %} 10668 10669 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10670 %{ 10671 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10672 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10673 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10674 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10675 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10676 ins_encode %{ 10677 int vlen_enc = vector_length_encoding(this); 10678 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10679 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10680 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10681 %} 10682 ins_pipe(pipe_slow); 10683 %} 10684 10685 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10686 %{ 10687 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10688 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10689 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10690 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10691 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10692 ins_encode %{ 10693 int vlen_enc = vector_length_encoding(this); 10694 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10695 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10696 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10697 %} 10698 ins_pipe(pipe_slow); 10699 %} 10700 10701 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10702 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10703 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10704 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10705 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10706 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10707 ins_encode %{ 10708 int vlen_enc = vector_length_encoding(this); 10709 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10710 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10711 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10712 %} 10713 ins_pipe( pipe_slow ); 10714 %} 10715 10716 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10717 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10718 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10719 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10720 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10721 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10722 ins_encode %{ 10723 int vlen_enc = vector_length_encoding(this); 10724 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10725 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10726 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10727 %} 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10732 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10733 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10734 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10735 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10736 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10737 ins_encode %{ 10738 int vlen_enc = vector_length_encoding(this); 10739 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10740 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10741 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10742 %} 10743 ins_pipe( pipe_slow ); 10744 %} 10745 10746 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10747 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10748 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10749 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10750 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10751 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10752 ins_encode %{ 10753 int vlen_enc = vector_length_encoding(this); 10754 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10755 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10756 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10757 %} 10758 ins_pipe( pipe_slow ); 10759 %} 10760 10761 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10762 %{ 10763 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10764 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10765 ins_encode %{ 10766 int vlen_enc = vector_length_encoding(this); 10767 BasicType bt = Matcher::vector_element_basic_type(this); 10768 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10769 %} 10770 ins_pipe(pipe_slow); 10771 %} 10772 10773 instruct reinterpretS2HF(regF dst, rRegI src) 10774 %{ 10775 match(Set dst (ReinterpretS2HF src)); 10776 format %{ "vmovw $dst, $src" %} 10777 ins_encode %{ 10778 __ vmovw($dst$$XMMRegister, $src$$Register); 10779 %} 10780 ins_pipe(pipe_slow); 10781 %} 10782 10783 instruct reinterpretHF2S(rRegI dst, regF src) 10784 %{ 10785 match(Set dst (ReinterpretHF2S src)); 10786 format %{ "vmovw $dst, $src" %} 10787 ins_encode %{ 10788 __ vmovw($dst$$Register, $src$$XMMRegister); 10789 %} 10790 ins_pipe(pipe_slow); 10791 %} 10792 10793 instruct convF2HFAndS2HF(regF dst, regF src) 10794 %{ 10795 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10796 format %{ "convF2HFAndS2HF $dst, $src" %} 10797 ins_encode %{ 10798 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10799 %} 10800 ins_pipe(pipe_slow); 10801 %} 10802 10803 instruct convHF2SAndHF2F(regF dst, regF src) 10804 %{ 10805 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10806 format %{ "convHF2SAndHF2F $dst, $src" %} 10807 ins_encode %{ 10808 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10809 %} 10810 ins_pipe(pipe_slow); 10811 %} 10812 10813 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10814 %{ 10815 match(Set dst (SqrtHF src)); 10816 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10817 ins_encode %{ 10818 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10819 %} 10820 ins_pipe(pipe_slow); 10821 %} 10822 10823 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10824 %{ 10825 match(Set dst (AddHF src1 src2)); 10826 match(Set dst (DivHF src1 src2)); 10827 match(Set dst (MulHF src1 src2)); 10828 match(Set dst (SubHF src1 src2)); 10829 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10830 ins_encode %{ 10831 int opcode = this->ideal_Opcode(); 10832 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10833 %} 10834 ins_pipe(pipe_slow); 10835 %} 10836 10837 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) 10838 %{ 10839 predicate(VM_Version::supports_avx10_2()); 10840 match(Set dst (MaxHF src1 src2)); 10841 match(Set dst (MinHF src1 src2)); 10842 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} 10843 ins_encode %{ 10844 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10845 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); 10846 %} 10847 ins_pipe( pipe_slow ); 10848 %} 10849 10850 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10851 %{ 10852 predicate(!VM_Version::supports_avx10_2()); 10853 match(Set dst (MaxHF src1 src2)); 10854 match(Set dst (MinHF src1 src2)); 10855 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10856 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10857 ins_encode %{ 10858 int opcode = this->ideal_Opcode(); 10859 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10860 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); 10861 %} 10862 ins_pipe( pipe_slow ); 10863 %} 10864 10865 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10866 %{ 10867 match(Set dst (FmaHF src2 (Binary dst src1))); 10868 effect(DEF dst); 10869 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10870 ins_encode %{ 10871 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10872 %} 10873 ins_pipe( pipe_slow ); 10874 %} 10875 10876 10877 instruct vector_sqrt_HF_reg(vec dst, vec src) 10878 %{ 10879 match(Set dst (SqrtVHF src)); 10880 format %{ "vector_sqrt_fp16 $dst, $src" %} 10881 ins_encode %{ 10882 int vlen_enc = vector_length_encoding(this); 10883 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 10884 %} 10885 ins_pipe(pipe_slow); 10886 %} 10887 10888 instruct vector_sqrt_HF_mem(vec dst, memory src) 10889 %{ 10890 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src)))); 10891 format %{ "vector_sqrt_fp16_mem $dst, $src" %} 10892 ins_encode %{ 10893 int vlen_enc = vector_length_encoding(this); 10894 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc); 10895 %} 10896 ins_pipe(pipe_slow); 10897 %} 10898 10899 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2) 10900 %{ 10901 match(Set dst (AddVHF src1 src2)); 10902 match(Set dst (DivVHF src1 src2)); 10903 match(Set dst (MulVHF src1 src2)); 10904 match(Set dst (SubVHF src1 src2)); 10905 format %{ "vector_binop_fp16 $dst, $src1, $src2" %} 10906 ins_encode %{ 10907 int vlen_enc = vector_length_encoding(this); 10908 int opcode = this->ideal_Opcode(); 10909 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10910 %} 10911 ins_pipe(pipe_slow); 10912 %} 10913 10914 10915 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2) 10916 %{ 10917 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2)))); 10918 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2)))); 10919 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2)))); 10920 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2)))); 10921 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %} 10922 ins_encode %{ 10923 int vlen_enc = vector_length_encoding(this); 10924 int opcode = this->ideal_Opcode(); 10925 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc); 10926 %} 10927 ins_pipe(pipe_slow); 10928 %} 10929 10930 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2) 10931 %{ 10932 match(Set dst (FmaVHF src2 (Binary dst src1))); 10933 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10934 ins_encode %{ 10935 int vlen_enc = vector_length_encoding(this); 10936 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 10937 %} 10938 ins_pipe( pipe_slow ); 10939 %} 10940 10941 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) 10942 %{ 10943 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1))))); 10944 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10945 ins_encode %{ 10946 int vlen_enc = vector_length_encoding(this); 10947 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc); 10948 %} 10949 ins_pipe( pipe_slow ); 10950 %} 10951 10952 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) 10953 %{ 10954 predicate(VM_Version::supports_avx10_2()); 10955 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); 10956 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); 10957 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} 10958 ins_encode %{ 10959 int vlen_enc = vector_length_encoding(this); 10960 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10961 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); 10962 %} 10963 ins_pipe( pipe_slow ); 10964 %} 10965 10966 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) 10967 %{ 10968 predicate(VM_Version::supports_avx10_2()); 10969 match(Set dst (MinVHF src1 src2)); 10970 match(Set dst (MaxVHF src1 src2)); 10971 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} 10972 ins_encode %{ 10973 int vlen_enc = vector_length_encoding(this); 10974 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; 10975 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); 10976 %} 10977 ins_pipe( pipe_slow ); 10978 %} 10979 10980 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) 10981 %{ 10982 predicate(!VM_Version::supports_avx10_2()); 10983 match(Set dst (MinVHF src1 src2)); 10984 match(Set dst (MaxVHF src1 src2)); 10985 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10986 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10987 ins_encode %{ 10988 int vlen_enc = vector_length_encoding(this); 10989 int opcode = this->ideal_Opcode(); 10990 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, 10991 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10992 %} 10993 ins_pipe( pipe_slow ); 10994 %}